In [9]:
import tensorflow as tf

sequence_length = 7
vocab_size = 128
embedding_size = 5

# emb captures embeddings for the entire vocabulary
# Usually obtained through word2vec training using some corpus (example: news data)
# But for this toy example, we are randomly generating them - [128 x 5] matrix
# We are also assuming we have only 128 words in this vocab set
emb = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), name="emb")

# Place holder to hold batch of sentences
# In this example, sentence is limited to max 7 words
input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
input_y = tf.placeholder(tf.int32, [None, 2], name="input_y")

# Look up word embeddings from emb for each sentence (pay attention to matrix shape)
emb_input = tf.nn.embedding_lookup(emb, input_x)

# Add one more dimension at end - channel, so that we can use conv2d later
# conv2d operator requires input to be in [batch, height, width, channel]
# in our example we have only channel (in case of images, we may have 3 channels)
# Convert data from [batch, height, width] => [batch, height, width, channel]
# Remember - we are just adding one dimension to matrix, it can only have one channel
emb_input_expanded = tf.expand_dims(emb_input, -1)

# Create 2 filters of each with heights [2, 3, 4]
# Filter with height 2 will cover 2 consecutive words each time
# width of filter is the embedding dimension size 
filter_sizes = [2, 3, 4] 
num_filters = 2

pooled_outputs = []
for i, filter_size in enumerate(filter_sizes):
    with tf.name_scope("conv-maxpool-%s" % filter_size):
        filter_shape = [filter_size, embedding_size, 1, num_filters]

        # Initialize each filter's weights
        # It is 3-dimesional filter, but depth is only 1 channel
        W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
        b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")

        # Apply Filter on input [batch, seq_length, embedding_size, 1]
        # Move one word at time
        conv = tf.nn.conv2d(emb_input_expanded, 
            W, strides=[1, 1, 1, 1], 
            padding="VALID", name="conv")

        # Apply relu - max(0, output) as activation functiona 
        h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")

        # Apply max pooling on result activation map
        pooled = tf.nn.max_pool( h, ksize=[1, sequence_length - filter_size + 1, 1, 1], 
            strides=[1, 1, 1, 1], padding='VALID', name="pool")
        pooled_outputs.append(pooled)

# Concatenate all outputs
num_filters_total = num_filters * len(filter_sizes)
h_pool = tf.concat(pooled_outputs, 3)
h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])

dropout_keep_prob = 0.5
with tf.name_scope("dropout"):
    h_drop = tf.nn.dropout(h_pool_flat, dropout_keep_prob)

# Final (unnormalized) scores and predictions
num_classes = 2
with tf.name_scope("output"):
    W = tf.Variable(tf.random_uniform([num_filters_total, num_classes], -1.0, 1.0), name = "W")
    b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
    scores = tf.nn.xw_plus_b(h_drop, W, b, name="scores")
    predictions = tf.argmax(scores, 1, name="predictions")

# Calculate mean cross-entropy loss
with tf.name_scope("loss"):
    losses = tf.nn.softmax_cross_entropy_with_logits(logits=scores, labels=input_y)
    loss = tf.reduce_mean(losses)

# Calculate Accuracy
with tf.name_scope("accuracy"):
    correct_predictions = tf.equal(predictions, tf.argmax(input_y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")

# Create session and initialize weight matrices
sess = tf.Session()
sess.run(tf.global_variables_initializer())

# Write graph definition to a file, so that tensorboard can read it ...
writer = tf.summary.FileWriter("./cnn_text", graph=tf.get_default_graph())

# Input setup, each row is one sentence, each column represents one word
# Pick each words embeddings from previously trained embeddings
# Batch contains 4 sentences, each sentence is one training sample (add labels later)
batch_x = [
    [1, 4, 6, 8, 20, 2, 8], 
    [11, 14, 16, 18, 20, 12, 18],
    [21, 24, 26, 28, 20, 22, 28],
    [31, 34, 36, 38, 20, 32, 38],
]

# Labels (1 hot enoding of 2 classes)
batch_y = [
    [0, 1],
    [1, 0],
    [0, 1],
    [1, 0]
]

x = sess.run(emb_input_expanded, feed_dict={input_x:batch_x, input_y:batch_y})
print(x)


[[[[-0.61675262]
   [ 0.47593522]
   [ 0.63476706]
   [ 0.70122576]
   [-0.92180514]]

  [[ 0.64232993]
   [ 0.87446809]
   [-0.45738983]
   [ 0.37791157]
   [ 0.43777013]]

  [[-0.39749408]
   [ 0.50773191]
   [ 0.70843768]
   [ 0.98850799]
   [ 0.43117452]]

  [[ 0.65729213]
   [-0.64869571]
   [ 0.96383023]
   [ 0.42564988]
   [-0.39756846]]

  [[-0.90904951]
   [ 0.57586622]
   [-0.83470488]
   [-0.65258765]
   [-0.28634191]]

  [[ 0.93928337]
   [ 0.49453354]
   [-0.49489546]
   [-0.88803959]
   [-0.92131376]]

  [[ 0.65729213]
   [-0.64869571]
   [ 0.96383023]
   [ 0.42564988]
   [-0.39756846]]]


 [[[-0.26823759]
   [ 0.25583148]
   [ 0.34666491]
   [ 0.91499162]
   [-0.45772266]]

  [[ 0.76512194]
   [ 0.50044823]
   [-0.55003142]
   [ 0.99735403]
   [-0.55149746]]

  [[ 0.97698236]
   [ 0.51228523]
   [-0.8531301 ]
   [-0.28255534]
   [ 0.16338778]]

  [[ 0.8657496 ]
   [-0.9270401 ]
   [ 0.81765532]
   [ 0.78862095]
   [ 0.11223102]]

  [[-0.90904951]
   [ 0.57586622]
   [-0.

In [7]:
x.shape

(4, 7, 5, 1)