In [1]:
'''
  code by Minho Ryu @bzantium
  Reference : https://github.com/ioatr/textcnn
'''
import tensorflow as tf
import numpy as np

tf.reset_default_graph()

# Text-CNN Parameter
embedding_size = 2 # n-gram
sequence_length = 3
num_classes = 2 # 0 or 1
filter_sizes = [2,2,2] # n-gram window
num_filters = 3

# 3 words sentences (=sequence_length is 3)
sentences = ["i love you","he loves me", "she likes baseball", "i hate you","sorry for that", "this is awful"]
labels = [1,1,1,0,0,0] # 1 is good, 0 is not good.

word_list = " ".join(sentences).split()
word_list = list(set(word_list))
word_dict = {w: i for i, w in enumerate(word_list)}
vocab_size = len(word_dict)

inputs = []
for sen in sentences:
    inputs.append(np.asarray([word_dict[n] for n in sen.split()]))

outputs = []
for out in labels:
    outputs.append(np.eye(num_classes)[out]) # ONE-HOT : To using Tensor Softmax Loss function

# Model
class TextCNN(object):
    def __init__(self, sess, vocab_size, sequence_length, embedding_size, 
                 filter_sizes, num_filters, num_classes):
        self.sess = sess
        self.vocab_size = vocab_size
        self.sequence_length = sequence_length
        self.embedding_size = embedding_size
        self.filter_sizes = filter_sizes
        self.num_filters = num_filters
        self.num_classes = num_classes
        self._build_model()
      
    def _build_model(self):
        # Placeholders for input, output
        with tf.variable_scope("placeholder"):
            self.X = tf.placeholder(tf.int32, [None, self.sequence_length])
            self.Y = tf.placeholder(tf.int32, [None, self.num_classes])
            self.embbeding_placeholder = tf.placeholder(tf.float32, (self.vocab_size, self.embedding_size))
      
        # Embedding layer
        with tf.variable_scope("embedding", reuse=tf.AUTO_REUSE):
            W = tf.get_variable("embedding_W", dtype=tf.float32, initializer=tf.random_uniform([self.vocab_size, self.embedding_size], -1.0, 1.0))
            embedded_chars = tf.nn.embedding_lookup(W, self.X)
            embedded_chars_expanded = tf.expand_dims(embedded_chars, -1) # add channel(=1) [batch_size, sequence_length, embedding_size, 1]      
      
        # Create a convolution + maxpool layer for each filter size
        pooled_outputs = []
        for filter_size in self.filter_sizes:
            with tf.variable_scope("conv-maxpool-%s" % filter_size, reuse=tf.AUTO_REUSE):
                # Convolution Layer
                filter_shape = (filter_size, self.embedding_size, 1, self.num_filters)
                W = tf.get_variable("W", dtype=tf.float32, initializer=tf.truncated_normal(filter_shape, stddev=0.1))
                b = tf.get_variable("b", dtype=tf.float32, initializer=tf.constant(0.1, shape=(self.num_filters,)))

                conv = tf.nn.conv2d(embedded_chars_expanded, # [batch_size, sequence_length, embedding_size, 1]
                                    W,              # [filter_size(n-gram window), embedding_size, 1, num_filters(=3)]
                                    strides=[1, 1, 1, 1],
                                    padding='VALID')
                # Apply nonlinearity
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name='relu')
                # Maxpooling over the outputs
                pooled = tf.nn.max_pool(h,
                                        ksize=[1, sequence_length - filter_size + 1, 1, 1], # [batch_size, filter_height, filter_width, channel]
                                        strides=[1, 1, 1, 1],
                                        padding='VALID')
                pooled_outputs.append(pooled) # dim of pooled : [batch_size(=6), output_height(=1), output_width(=1), channel(=1)]

        # Combine all the pooled features
        num_filters_total = self.num_filters * len(self.filter_sizes)
        h_pool = tf.concat(pooled_outputs, self.num_filters) # h_pool : [batch_size(=6), output_height(=1), output_width(=1), channel(=1) * 3]
        h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total]) # [batch_size, ]

        # Final (unnormalized) scores and predictions
        with tf.variable_scope("output", reuse=tf.AUTO_REUSE):
            W = tf.get_variable('output_W', shape=[num_filters_total, num_classes], 
                                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]))
            logits = tf.nn.xw_plus_b(h_pool_flat, W, b)  
            self.cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=self.Y))
            self.optimizer = tf.train.AdamOptimizer(0.001).minimize(self.cost)
            hypothesis = tf.nn.softmax(logits)
            self.predictions = tf.argmax(hypothesis, 1)

        tf.global_variables_initializer().run()
    
    def train(self, inputs, labels):
        return self.sess.run([self.cost, self.optimizer], feed_dict={self.X: inputs, self.Y: labels})
     
    def predict(self, inputs):
        return self.sess.run([self.predictions], feed_dict={self.X: inputs})
      
# Training
run_config = tf.ConfigProto()
run_config.gpu_options.allow_growth=True
with tf.Session(config=run_config) as sess:
    model = TextCNN(sess, vocab_size, sequence_length, embedding_size, filter_sizes, num_filters, num_classes)
    for epoch in range(5000):
        loss, _ = model.train(inputs, outputs)
        if (epoch + 1)%1000 == 0:
            print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss))
    
    # Test
    test_text = ['sorry hate you', 'you love me']
    tests = []
    for text in test_text:
        tests.append(np.asarray([word_dict[n] for n in text.split()]))
    result = model.predict(tests)[0]
    
for i, text in enumerate(test_text):
  if result[i] == 0:
      print("\'" + text + "\'", "is bad :(")
  else:
      print("\'"+ text + "\'", "is good :)")

Instructions for updating:
Colocations handled automatically by placer.

For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Use tf.cast instead.
Epoch: 1000 cost = 0.002293
Epoch: 2000 cost = 0.000452
Epoch: 3000 cost = 0.000163
Epoch: 4000 cost = 0.000073
Epoch: 5000 cost = 0.000036
'sorry hate you' is bad :(
'you love me' is good :)
