In [3]:
import tensorflow as tf

sequence_length = 7
vocab_size = 128
embedding_size = 5

# emb captures embeddings for the entire vocabulary
# Usually obtained through word2vec training using some corpus (example: news data)
# But for this toy example, we are randomly generating them - [128 x 5] matrix
# We are also assuming we have only 128 words in this vocab set
emb = tf.Variable(tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), name="emb")

# Place holder to hold batch of sentences
# In this example, sentence is limited to max 7 words
input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")

# Look up word embeddings from emb for each sentence (pay attention to matrix shape)
emb_input = tf.nn.embedding_lookup(emb, input_x)

# Add one more dimension at end - channel, so that we can use conv2d later
# conv2d operator requires input to be in [batch, height, width, channel]
# in our example we have only channel (in case of images, we may have 3 channels)
# Convert data from [batch, height, width] => [batch, height, width, channel]
# Remember - we are just adding one dimension to matrix, it can only have one channel
emb_input_expanded = tf.expand_dims(emb_input, -1)

# Create session and initialize weight matrices
sess = tf.Session()
sess.run(tf.global_variables_initializer())

# Write graph definition to a file, so that tensorboard can read it ...
writer = tf.summary.FileWriter("./cnn_text", graph=tf.get_default_graph())

# Input setup, each row is one sentence, each column represents one word
# Pick each words embeddings from previously trained embeddings
# Batch contains 4 sentences, each sentence is one training sample (add labels later)
batch_x = [
    [1, 4, 6, 8, 20, 2, 8], 
    [11, 14, 16, 18, 20, 12, 18],
    [21, 24, 26, 28, 20, 22, 28],
    [31, 34, 36, 38, 20, 32, 38],
]

sess.run(emb_input_expanded, feed_dict={input_x:batch_x})


array([[[[ 0.01870418],
         [-0.92921925],
         [-0.12055087],
         [ 0.18857121],
         [ 0.12271571]],

        [[ 0.81799889],
         [-0.49390912],
         [-0.92477608],
         [ 0.63109088],
         [ 0.47854328]],

        [[ 0.48442602],
         [-0.2362926 ],
         [-0.11595845],
         [-0.72293115],
         [ 0.52442193]],

        [[-0.46741796],
         [-0.29171753],
         [-0.6304512 ],
         [ 0.06384611],
         [-0.43557835]],

        [[-0.49989343],
         [-0.36670852],
         [ 0.36700058],
         [-0.1702528 ],
         [-0.20752215]],

        [[-0.74625754],
         [-0.47256565],
         [-0.18132567],
         [ 0.35549712],
         [-0.04972649]],

        [[-0.46741796],
         [-0.29171753],
         [-0.6304512 ],
         [ 0.06384611],
         [-0.43557835]]],


       [[[-0.56025958],
         [-0.74038267],
         [-0.10035944],
         [ 0.33942556],
         [ 0.50724554]],

        [[ 0.14959049]

In [5]:
# Word embeddings matrix (128 vocab, each word is represented by 5 dimensions)
w = sess.run(emb, feed_dict={input_x:batch_x})
w.shape

(128, 5)

In [7]:
# Just check out word embeddings of word 3 (index starts at 0)
w[2]

array([-0.74625754, -0.47256565, -0.18132567,  0.35549712, -0.04972649], dtype=float32)

In [10]:
# Just check input - batch of 4 sentences, explore the first sentence
# Composed of 7 words indicated by their indices in vocabulary 
# Example - this sentence may by "I like this movie very much !"
batch_x[0]


[1, 4, 6, 8, 20, 2, 8]

In [12]:
# extract word embeddings for the entire batch [4 ,7]  (4 sentences and each sentence - 7 words)
# batch [4, 7] => [4, 7, 5]
we = sess.run(emb_input, feed_dict={input_x:batch_x})
we.shape

(4, 7, 5)

In [13]:
# Explore first sentence word embeddings 
# these are word embeddings for sentence 0 => "I like this movie very much !" => [1, 4, 6, 8, 20, 2, 8]
we[0]

array([[ 0.01870418, -0.92921925, -0.12055087,  0.18857121,  0.12271571],
       [ 0.81799889, -0.49390912, -0.92477608,  0.63109088,  0.47854328],
       [ 0.48442602, -0.2362926 , -0.11595845, -0.72293115,  0.52442193],
       [-0.46741796, -0.29171753, -0.6304512 ,  0.06384611, -0.43557835],
       [-0.49989343, -0.36670852,  0.36700058, -0.1702528 , -0.20752215],
       [-0.74625754, -0.47256565, -0.18132567,  0.35549712, -0.04972649],
       [-0.46741796, -0.29171753, -0.6304512 ,  0.06384611, -0.43557835]], dtype=float32)

In [14]:
# Finally convert [4, 7, 5] matrix into [4, 7, 5, 1] - Just added one dimension 
we_exp = sess.run(emb_input_expanded, feed_dict={input_x:batch_x})
we_exp.shape

(4, 7, 5, 1)

In [15]:
we_exp[0]

array([[[ 0.01870418],
        [-0.92921925],
        [-0.12055087],
        [ 0.18857121],
        [ 0.12271571]],

       [[ 0.81799889],
        [-0.49390912],
        [-0.92477608],
        [ 0.63109088],
        [ 0.47854328]],

       [[ 0.48442602],
        [-0.2362926 ],
        [-0.11595845],
        [-0.72293115],
        [ 0.52442193]],

       [[-0.46741796],
        [-0.29171753],
        [-0.6304512 ],
        [ 0.06384611],
        [-0.43557835]],

       [[-0.49989343],
        [-0.36670852],
        [ 0.36700058],
        [-0.1702528 ],
        [-0.20752215]],

       [[-0.74625754],
        [-0.47256565],
        [-0.18132567],
        [ 0.35549712],
        [-0.04972649]],

       [[-0.46741796],
        [-0.29171753],
        [-0.6304512 ],
        [ 0.06384611],
        [-0.43557835]]], dtype=float32)