# 이름만 바꿔서 실행하면 됩니다

In [14]:
import tensorflow as tf

In [15]:
import os
import sys
import tensorflow as tf
import numpy as np
import argparse
from datetime import datetime

from dataset import Dataset
from trainer import MatchingModelTrainer
from preprocessor import Preprocessor
from utils.dirs import create_dirs
from utils.logger import SummaryWriter
from utils.config import load_config, save_config
from models.base import get_model
from utils.utils import JamoProcessor

In [16]:
NAME = "new_delstm_nsrandom4echo_lr1e-3"
TOKENIZER = "SentencePieceTokenizer"

In [17]:
base_dir = "/media/scatter/scatterdisk/reply_matching_model/runs/{}/".format(NAME)
config_dir = base_dir + "config.json"
best_model_dir = base_dir + "best_loss/best_loss.ckpt"
# best_model_dir = base_dir + "model.ckpt"

In [18]:
model_config = load_config(config_dir)
preprocessor = Preprocessor(model_config)

infer_config = load_config(config_dir)
setattr(infer_config, "tokenizer", TOKENIZER)
setattr(infer_config, "soynlp_scores", "/media/scatter/scatterdisk/tokenizer/soynlp_scores.sol.100M.txt")
infer_preprocessor = Preprocessor(infer_config)
infer_preprocessor.build_preprocessor()

In [19]:
graph = tf.Graph()
tf_config = tf.ConfigProto()
tf_config.gpu_options.allow_growth = True

with graph.as_default():
    Model = get_model(model_config.model)
    data = Dataset(preprocessor, 
               model_config.train_dir, 
               model_config.val_dir, 
               model_config.min_length, 
               model_config.max_length, 
               model_config.batch_size, 
               model_config.shuffle, 
               model_config.num_epochs, 
               debug=False)
    infer_model = Model(data, model_config)
    infer_sess = tf.Session(config=tf_config, graph=graph)
    infer_sess.run(tf.global_variables_initializer())
    infer_sess.run(tf.local_variables_initializer())

infer_model.load(infer_sess, model_dir=best_model_dir)

Pre-trained embedding loaded. Number of OOV : 5272 / 90000


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Restoring parameters from /media/scatter/scatterdisk/reply_matching_model/runs/new_delstm_nsrandom4echo_lr1e-3/best_loss/best_loss.ckpt


In [20]:
def load_test_data(preprocessor):
    base_dir = "/home/angrypark/reply_matching_model/data/"
    with open(os.path.join(base_dir, "test_queries.txt"), "r") as f:
        test_queries = [line.strip() for line in f]
    with open(os.path.join(base_dir, "test_replies.txt"), "r") as f:
        replies_set = [line.strip().split("\t") for line in f]
    with open(os.path.join(base_dir, "test_labels.txt"), "r") as f:
        test_labels = [[int(y) for y in line.strip().split("\t")] for line in f]

    test_queries, test_queries_lengths = zip(*[preprocessor.preprocess(query)
                                                     for query in test_queries])
    test_replies = list()
    test_replies_lengths = list()
    for replies in replies_set:
        r, l = zip(*[preprocessor.preprocess(reply) for reply in replies])
        test_replies.append(r)
        test_replies_lengths.append(l)
    return test_queries, test_replies, test_queries_lengths, test_replies_lengths, test_labels

In [21]:
def test(model, sess, preprocessor):
    test_queries, test_replies, test_queries_lengths, \
    test_replies_lengths, test_labels = load_test_data(preprocessor)

    # flatten
    row, col, _ = np.shape(test_replies)
    test_queries_expanded = [[q]*col for q in test_queries]
    test_queries_expanded = [y for x in test_queries_expanded for y in x]
    test_queries_lengths_expanded = [[l]*col for l in test_queries_lengths]
    test_queries_lengths_expanded = [y for x in test_queries_lengths_expanded for y in x]
    test_replies = [y for x in test_replies for y in x]
    test_replies_lengths = [y for x in test_replies_lengths for y in x]

    feed_dict = {model.input_queries: test_queries_expanded,
                 model.input_replies: test_replies,
                 model.query_lengths: test_queries_lengths_expanded,
                 model.reply_lengths: test_replies_lengths, 
                 model.embed_dropout_keep_prob: 1, 
                 model.lstm_dropout_keep_prob: 1}
    probs = sess.run(model.positive_probs, feed_dict=feed_dict)
    probs = np.reshape(probs, [row, col])
    return test_labels, probs.tolist()

In [22]:
y_true, y_prob = test(infer_model, infer_sess, infer_preprocessor)

In [23]:
from sklearn.metrics import precision_recall_curve, f1_score, average_precision_score

In [24]:
def evaluate_metrics(y_true, y_prob, k=5):
    def get_rank(y_true, y_prob):
        rs = list()
        for y_t, y_p in zip(y_true, y_prob):
            r = sorted([(t, p) for t, p in zip(y_t, y_p)], key=lambda x: x[1], reverse=True)
            r = [t for t, p in r]
            rs.append(r)
        return rs

    def get_precision_at_k(rs, k):
        rs = [(np.asarray(r)[:k] != 0) for r in rs]
        return np.mean([np.mean(r) for r in rs])
    
    def mean_reciprocal_rank(rs):
        rs = (np.asarray(r).nonzero()[0] for r in rs)
        return np.mean([1. / (r[0] + 1) if r.size else 0. for r in rs])
    
    def dcg_at_k(r, k):
        r = np.asfarray(r)[:k]
        return np.sum(r / np.log2(np.arange(2, r.size + 2)))
    
    def ndcg_at_k(r, k):
        dcg_max = dcg_at_k(sorted(r, reverse=True), k)
        if not dcg_max:
            return 0.
        return dcg_at_k(r, k) / dcg_max
    
    def mean_ndcg_at_k(rs, k):
        return np.mean([ndcg_at_k(r, k) for r in rs])
    
    def flatten(list_of_lists):
        return [y for x in list_of_lists for y in x]
    
    def get_best_threshold(y_true, y_prob):
        y_true_binary = [y!=0 for y in flatten(y_true)]
        precision, recall, thresholds = precision_recall_curve(y_true_binary, flatten(y_prob))
        best_f_measure = 0
        best_threshold = 0
        for p, r, t in zip(precision, recall, thresholds):
            if (p+r) == 0:
                continue
            f_measure = 2*p*r/(p+r)
            if f_measure > best_f_measure:
                best_f_measure = f_measure
                best_threshold = t
        return np.round(best_threshold, 2)
    
    def get_f1_score(y_true, y_prob, threshold):
        return f1_score([y!=0 for y in flatten(y_true)], [int(y>=threshold) for y in flatten(y_prob)])
    
    rs = get_rank(y_true, y_prob)
    threshold = get_best_threshold(y_true, y_prob)
    f_measure = get_f1_score(y_true, y_prob, threshold)
    
    return {"precision_at_{}".format(k): get_precision_at_k(rs, k), 
            "mrr": mean_reciprocal_rank(rs), 
            "ndcg": mean_ndcg_at_k(rs, 10), 
            "threshold": threshold, 
            "f1_score": f_measure}

In [25]:
import editdistance

In [29]:
result = {"name": model_config.name, 
          "model": model_config.model, 
          "negative_sampling": model_config.negative_sampling, 
          "num_negative_samples": model_config.num_negative_samples, 
          "epoch": infer_model.cur_epoch_tensor.eval(infer_sess),
          "step": infer_model.global_step_tensor.eval(infer_sess)}
result.update(evaluate_metrics(y_true, y_prob))
result

{'epoch': 4,
 'f1_score': 0.5084745762711865,
 'model': 'DualEncoderLSTM',
 'mrr': 0.7052962439101053,
 'name': 'new_delstm_nsrandom4echo_lr1e-3',
 'ndcg': 0.7396109296005957,
 'negative_sampling': 'random',
 'num_negative_samples': 4,
 'precision_at_5': 0.41089108910891087,
 'step': 1960000,
 'threshold': 0.49}

In [39]:
replies = list()


202

In [40]:
t.split("\n")[0]

'아이스크림 좋아해요?\t2\t배고프면 뭐라도 먹어야죠\t0\t전 초코 아이스크림 좋아해요\t2\t전 간식 별로 안좋아해요\t2\t일하다가 당떨어졌을 때 간식같은거 먹으면 좋죠\t2\t어떤 간식을 제일 좋아해요?\t1\t짜장면 좋아해요?\t0\t배부를 때가 제일 행복하죠\t0\t저도 다이어트 해야하는데\t0\t갑자기 달달한 게 땡기네요\t1'

In [42]:
result = list()
for line in t.split("\n"):
    splits = line.strip().split("\t")
    tmp = list()
    for i in range(10):
        tmp.append((splits[2*i], splits[2*i+1]))
    tmp = sorted(tmp, key=lambda x: int(x[1]), reverse=True)
    tmp = ["\t".join(item) for item in tmp]
    result.append("\t".join(tmp))

In [58]:
wrong = list()
for line in result:
    for item in line.strip().split("\t"):
        if item in ["0", "1", "2", "3"]:
            pass
        else:
            if item not in replies:
                wrong.append([line, item])