In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
sent = [
  'the glass of milk',
  'the glass of juice',
  'the cup of tea',
  'i am a good boy',
  'i am a good developer',
  'understand the meaning of words',
  'your videos are good',
]

In [4]:
# define vocabulary size
vocab_size = 1000

In [None]:
# one-hot encode the sentences
one_hot_repr = [one_hot(words, vocab_size) for words in sent]
one_hot_repr

[[695, 618, 543, 468],
 [695, 618, 543, 205],
 [695, 384, 543, 711],
 [270, 314, 712, 288, 22],
 [270, 314, 712, 288, 404],
 [268, 695, 443, 543, 392],
 [576, 497, 802, 288]]

In [8]:
## word embedding representation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential

In [10]:
sent_length = 8
embedded_docs = pad_sequences(one_hot_repr, padding='pre', maxlen=sent_length)
print(embedded_docs)

[[  0   0   0   0 695 618 543 468]
 [  0   0   0   0 695 618 543 205]
 [  0   0   0   0 695 384 543 711]
 [  0   0   0 270 314 712 288  22]
 [  0   0   0 270 314 712 288 404]
 [  0   0   0 268 695 443 543 392]
 [  0   0   0   0 576 497 802 288]]


In [11]:
## feature representation
dim = 10

In [15]:
model = Sequential()
model.add(Embedding(vocab_size, dim, input_length=sent_length))
model.compile('adam', 'mse')
model.summary()



In [16]:
model.predict(embedded_docs)  # This will output the embedding vectors for the padded sequences


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 160ms/step


array([[[ 0.02486354, -0.01858795,  0.0328112 , -0.01486254,
          0.00521374, -0.00658111,  0.0487851 ,  0.02783911,
          0.01061176,  0.04645861],
        [ 0.02486354, -0.01858795,  0.0328112 , -0.01486254,
          0.00521374, -0.00658111,  0.0487851 ,  0.02783911,
          0.01061176,  0.04645861],
        [ 0.02486354, -0.01858795,  0.0328112 , -0.01486254,
          0.00521374, -0.00658111,  0.0487851 ,  0.02783911,
          0.01061176,  0.04645861],
        [ 0.02486354, -0.01858795,  0.0328112 , -0.01486254,
          0.00521374, -0.00658111,  0.0487851 ,  0.02783911,
          0.01061176,  0.04645861],
        [-0.00029913,  0.00103111, -0.02410663, -0.01112516,
         -0.03800204, -0.03704157, -0.00131283, -0.00056649,
         -0.0138671 , -0.02483751],
        [ 0.02123154,  0.04267054, -0.00036323, -0.04491589,
          0.01818753,  0.0258238 ,  0.04017247,  0.03854562,
          0.03417586, -0.00488987],
        [-0.00895194, -0.00178967, -0.00585229,  0.0

In [17]:
embedded_docs[0]

array([  0,   0,   0,   0, 695, 618, 543, 468])

In [20]:
model.predict(embedded_docs[0:1])  # This will output the embedding vectors for the padded sequences

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step


array([[[ 0.02486354, -0.01858795,  0.0328112 , -0.01486254,
          0.00521374, -0.00658111,  0.0487851 ,  0.02783911,
          0.01061176,  0.04645861],
        [ 0.02486354, -0.01858795,  0.0328112 , -0.01486254,
          0.00521374, -0.00658111,  0.0487851 ,  0.02783911,
          0.01061176,  0.04645861],
        [ 0.02486354, -0.01858795,  0.0328112 , -0.01486254,
          0.00521374, -0.00658111,  0.0487851 ,  0.02783911,
          0.01061176,  0.04645861],
        [ 0.02486354, -0.01858795,  0.0328112 , -0.01486254,
          0.00521374, -0.00658111,  0.0487851 ,  0.02783911,
          0.01061176,  0.04645861],
        [-0.00029913,  0.00103111, -0.02410663, -0.01112516,
         -0.03800204, -0.03704157, -0.00131283, -0.00056649,
         -0.0138671 , -0.02483751],
        [ 0.02123154,  0.04267054, -0.00036323, -0.04491589,
          0.01818753,  0.0258238 ,  0.04017247,  0.03854562,
          0.03417586, -0.00488987],
        [-0.00895194, -0.00178967, -0.00585229,  0.0