In [23]:
import os
import re
import json
import tensorflow as tf
from tensorflow.python.platform import flags
import numpy as np
from model import Model
import matplotlib.pyplot as plt
import argparse
from data_helpers import split_sentence, clean_str, get_vocab

plt.rcParams['figure.figsize'] = (16, 12)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

%matplotlib inline
%config InlineBackend.figure_format = 'retina'


In [2]:
# reset tf.flag
tf.app.flags.FLAGS = flags._FlagValues()
tf.app.flags._global_parser = argparse.ArgumentParser()

flags = tf.app.flags
flags.DEFINE_integer('batch_size', 1, 'Batch size')
flags.DEFINE_integer('epochs', 100, 'epochs')
flags.DEFINE_integer('rnn_size', 300, 'RNN unit size')
flags.DEFINE_integer('word_attention_size', 300, 'Word level attention unit size')
flags.DEFINE_integer('sent_attention_size', 300, 'Sentence level attention unit size')
flags.DEFINE_integer('char_embedding_size', 300, 'Embedding dimension')
flags.DEFINE_string('checkpoint_dir', 'checkpoint', 'Directory name to save the checkpoints [checkpoint]')
flags.DEFINE_integer('vocab_size', 6790, 'vocabulary size')
flags.DEFINE_float('keep_prob', 0.5, 'Dropout keep prob')
flags.DEFINE_integer('document_size', 30, 'document size')
flags.DEFINE_integer('sentence_size', 50, 'sentence size')
flags.DEFINE_float('grad_clip', 5.0, 'grad clip')
FLAGS = flags.FLAGS

checkpoint_path = os.path.join(FLAGS.checkpoint_dir, 'model.ckpt')

# load pre-trained char embedding
char_emb = np.load('./data/emb.npy')

with tf.variable_scope('model'):
    test_model = Model(FLAGS)


In [119]:
pos_doc = """作者的觀點獨特，語言犀利，深刻的總結了男人與女人之間的是非恩怨，把男人與女人在生活中積累的宿怨一語道破---原來非本質問題，而是雙方本能。此書不足之處乃後面章節雷同，越看越有寡味之感，但是還是要感謝作者帶我們遊覽一次男人、女人的本能世界！呵呵，建議讀者可以看一看，有則改之，無則加勉！！"""
neg_doc = """書太小了 拿到手裡很是失望本以為會多麼精緻~~~真的是適合小孩看啊可很小的孩子的又看不懂還不是大人給小孩子講所以應該精緻一點才好價錢還是不太值啊"""

Documents
=====

### Positive:
作者的觀點獨特，語言犀利，深刻的總結了男人與女人之間的是非恩怨，把男人與女人在生活中積累的宿怨一語道破---原來非本質問題，而是雙方本能。此書不足之處乃後面章節雷同，越看越有寡味之感，但是還是要感謝作者帶我們遊覽一次男人、女人的本能世界！呵呵，建議讀者可以看一看，有則改之，無則加勉！！

### Negative:
書太小了 拿到手裡很是失望本以為會多麼精緻~~~真的是適合小孩看啊可很小的孩子的又看不懂還不是大人給小孩子講所以應該精緻一點才好價錢還是不太值啊


In [120]:
vocab, _ = get_vocab('./data/vocab.pkl')
max_doc_len = FLAGS.document_size
max_sent_len = FLAGS.sentence_size

def vectorize(doc):
    doc_sents = split_sentence(clean_str(doc))
    document_length = len(doc_sents)
    sentence_lengths = np.zeros((max_doc_len,), dtype=np.int64)
    data = np.ones((max_doc_len * max_sent_len,), dtype=np.int64) # 1 = PAD
    doc_len = min(document_length, max_doc_len)

    for j in range(doc_len):
        sent = doc_sents[j]
        actual_len = len(sent)
        pos = j * max_sent_len
        sent_len = min(actual_len, max_sent_len)
        # sentence_lengths
        sentence_lengths[j] = sent_len
        # dataset
        data[pos:pos+sent_len] = [vocab.get(sent[k], 0) for k in range(sent_len)]
    
    return data, document_length, sentence_lengths, doc_sents

pos_data = vectorize(pos_doc)
neg_data = vectorize(neg_doc)

print('\n'.join(pos_data[-1]), '\n')
print('\n'.join(neg_data[-1]), '\n')

作者的觀點獨特
語言犀利
深刻的總結了男人與女人之間的是非恩怨
把男人與女人在生活中積累的宿怨一語道破
原來非本質問題
而是雙方本能
此書不足之處乃後面章節雷同
越看越有寡味之感
但是還是要感謝作者帶我們遊覽一次男人
女人的本能世界
呵呵
建議讀者可以看一看
有則改之
無則加勉 

書太小了
拿到手裡很是失望本以為會多麼精緻
真的是適合小孩看啊可很小的孩子的又看不懂還不是大人給小孩子講所以應該精緻一點才好價錢還是不太值啊 



In [121]:
saver = tf.train.Saver()
with tf.Session() as sess:
    saver.restore(sess, checkpoint_path)
    
    sess.run([], feed_dict={test_model.embedding: char_emb})
    
    def forward(data, document_length, sentence_lengths):
        pred, attention = sess.run([test_model.pred, test_model.char_attentions], feed_dict={
            test_model.inputs: data.reshape((1, FLAGS.document_size, FLAGS.sentence_size)),
            test_model.sentence_lengths: [sentence_lengths],
            test_model.document_lengths: [document_length],
            test_model.is_training: False
        })
        return pred[0], attention
    
    pos_result, pos_attention = forward(pos_data[0], pos_data[1], pos_data[2])
    neg_result, neg_attention = forward(neg_data[0], neg_data[1], neg_data[2])

print('pos_doc: ', pos_result, 'neg_doc:', neg_result)

INFO:tensorflow:Restoring parameters from checkpoint/model.ckpt


INFO:tensorflow:Restoring parameters from checkpoint/model.ckpt


pos_doc:  1 neg_doc: 0


In [122]:
from IPython.display import Javascript

#runs arbitrary javascript, client-side
Javascript("""
           window.pos_attention={};
           window.neg_attention={};
           window.pos_doc={};
           window.neg_doc={};
           """.format(json.dumps(pos_attention.tolist()),
                      json.dumps(neg_attention.tolist()),
                      pos_data[-1],
                      neg_data[-1]))


<IPython.core.display.Javascript object>

In [123]:
%%javascript

function visualizer (doc, attention) {
    const elements = attention
        .filter((row, i) => i < doc.length)
        .map((row, i) => {
            const sent = doc[i];
            const elems = row
                .filter((col, j) => j < sent.length)
                .map((col, j) => {
                    const char = sent[j];
                    const weight = col * 10;
                    return '<span style="background-color: rgba(255, 50, 50, ' + weight + ');">' + char + '</span>';
                })
                .join('');
            return '<p>' + elems + '</p>';
        });

    return elements.join('\n');
}

const visualized_pos_doc = visualizer(pos_doc, pos_attention)
element.append('<div style="float:left; width:50%"><h1>Positive Document with attention</h1>' + visualized_pos_doc + '</div>')

const visualized_neg_doc = visualizer(neg_doc, neg_attention)
element.append('<div><h1>Negative Document with attention</h1>' + visualized_neg_doc + '</div>')

<IPython.core.display.Javascript object>