In [1]:
import os
import sys
import tensorflow as tf
import numpy as np
import argparse
from datetime import datetime

from dataset import Dataset
from trainer import MatchingModelTrainer
from preprocessor import Preprocessor
from utils.dirs import create_dirs
from utils.logger import SummaryWriter
from utils.config import load_config, save_config
from models.base import get_model
from utils.utils import JamoProcessor

now = datetime.now()

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
class Config:
    def __init__(self):
        self.mode = "train"
        self.name = "debug2000"
        self.config = ""
        self.train_dir = "/media/scatter/scatterdisk/reply_matching_model/sol.tokenized.sent_piece_100K/"
        self.val_dir = "/media/scatter/scatterdisk/reply_matching_model/sol.tokenized.sent_piece_100K/sol.validation.txt"
        self.pretrained_embed_dir = "/media/scatter/scatterdisk/pretrained_embedding/fasttext.sent_piece_100K.256D"
        self.checkpoint_dir = "/home/angrypark/"
        self.model = "DualEncoderTransformerDense2"
        self.sent_piece_model = "/media/scatter/scatterdisk/tokenizer/sent_piece.50K.model"
        self.soynlp_scores = "/media/scatter/scatterdisk/tokenizer/soynlp_scores.sol.100M.txt"
        self.normalizer = "DummyNormalizer"
        self.tokenizer = "DummyTokenizer"
        self.vocab_size = 90000
        self.vocab_list = "/media/scatter/scatterdisk/pretrained_embedding/vocab_list.sent_piece_100K.txt"
        
        self.embed_dim = 256
        self.learning_rate = 1e-1
        self.min_length = 1
        self.max_length = 20
        self.embed_dropout_keep_prob = 0.9
        self.lstm_dropout_keep_prob = 0.9
        
        self.lstm_dim = 512
        self.negative_sampling = "random"
        self.num_negative_samples = 4
        self.add_echo = False
        
        self.batch_size = 512
        self.num_epochs = 300
        self.evaluate_every = 100000
        self.save_every = 1000000
        
        self.max_to_keep = 1
        self.shuffle = True
        
        self.filter_sizes="2,3"
        self.num_filters=64
        self.num_hidden=128
        self.hidden_dropout_keep_prob=0.9
        self.dense_dropout_keep_prob=0.9
        
        self.weak_supervision=False
        self.hinge_loss = 0.3

config = Config()

In [4]:
config = create_dirs(config)
device_config = tf.ConfigProto()
device_config.gpu_options.allow_growth = True
sess = tf.Session(config=device_config)

In [5]:
preprocessor = Preprocessor(config)

In [6]:
data = Dataset(preprocessor, 
               config.train_dir, 
               config.val_dir, 
               config.min_length, 
               config.max_length, 
               config.num_negative_samples,
               config.batch_size, 
               config.shuffle, 
               config.num_epochs, 
               debug=False)
summary_writer = SummaryWriter(sess, config)
trainer = MatchingModelTrainer(sess, preprocessor, data, config, summary_writer)

In [7]:
data.train_size = 10000
data.val_size = 10000
trainer.num_steps_per_epoch = (10000 - 1) // config.batch_size + 1

In [8]:
model, sess = trainer.build_graph()

[32m[21:56:46][INFO] Building train graph... [0m


INFO:tensorflow:Base learning rate: 2.000000
Pre-trained embedding loaded. Number of OOV : 5272 / 90000
INFO:tensorflow:Setting T2TModel mode to 'train'
:::MLPv0.5.0 transformer 1541163411.067282438 (/home/angrypark/envs/angryenv/lib/python3.6/site-packages/tensor2tensor/utils/t2t_model.py:213) model_hp_initializer_gain: 1.0
INFO:tensorflow:Using variable initializer: uniform_unit_scaling
INFO:tensorflow:Building model body
:::MLPv0.5.0 transformer 1541163411.177893639 (/home/angrypark/envs/angryenv/lib/python3.6/site-packages/tensor2tensor/models/transformer.py:1106) model_hp_hidden_layers: 2
:::MLPv0.5.0 transformer 1541163411.187114000 (/home/angrypark/envs/angryenv/lib/python3.6/site-packages/tensor2tensor/models/transformer.py:1106) model_hp_attention_num_heads: 4
:::MLPv0.5.0 transformer 1541163411.195665598 (/home/angrypark/envs/angryenv/lib/python3.6/site-packages/tensor2tensor/models/transformer.py:1106) model_hp_attention_dropout: 0.1
:::MLPv0.5.0 transformer 1541163411.53033

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
[32m[21:56:59][INFO] Loading checkpoint from /home/angrypark/debug2000/ [0m
[31m[21:56:59][ERROR] No checkpoint found in /home/angrypark/debug2000/ [0m


In [9]:
feed_dict = {model.lstm_dropout_keep_prob: 1,
             model.num_negative_samples: config.num_negative_samples,
             model.embed_dropout_keep_prob: 1,
             model.dense_dropout_keep_prob: 1}
if config.weak_supervision:
    input_queries, input_replies, query_lengths, reply_lengths, weak_distances = \
    trainer.infer_sess.run([trainer.infer_model.input_queries, 
                         trainer.infer_model.input_replies, 
                         trainer.infer_model.queries_lengths, 
                         trainer.infer_model.replies_lengths, 
                         trainer.infer_model.distances], 
                        feed_dict={trainer.infer_model.dropout_keep_prob: 1, 
                                   trainer.infer_model.add_echo: False})
    feed_dict.update({model.input_queries: input_queries, 
                      model.input_replies: input_replies, 
                      model.query_lengths: query_lengths, 
                      model.reply_lengths: reply_lengths, 
                      model.weak_distances: weak_distances})

In [10]:
queries_embedded, replies_embedded, queries_encoded, replies_encoded, queries_pooled, replies_pooled, \
queries_flattened, replies_flattened, logits, labels = sess.run([model.queries_embedded, model.replies_embedded, model.queries_encoded,
                            model.replies_encoded,
                            model.queries_pooled,
                            model.replies_pooled,
                            model.queries_flattened,
                            model.replies_flattened,
                           model.logits, 
                           model.labels
                            ], feed_dict = feed_dict)

In [11]:
queries_encoded.shape

(512, 20, 1, 256)

In [12]:
queries_embedded.shape

(512, 20, 256)

In [13]:
queries_embedded.shape

(512, 20, 256)

In [14]:
queries_pooled.shape

(512, 1, 1, 256)

In [15]:
logits.shape

(2555, 1)

In [16]:
labels.shape

(2555, 1)

In [17]:
logits

array([[847.7907  ],
       [622.5545  ],
       [396.15997 ],
       ...,
       [427.43973 ],
       [-81.023415],
       [380.96375 ]], dtype=float32)

In [18]:
import numpy as np

In [19]:
import numpy as np
def sigmoid(x, derivative=False):
    return x*(1-x) if derivative else 1/(1+np.exp(-x))

In [20]:
predictions = sigmoid(logits)>0.5

  This is separate from the ipykernel package so we can avoid doing imports until


In [21]:
np.mean(np.equal(predictions, labels))

0.225440313111546