In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
sent = ['the glass of milk',
        'the glass of juice',
        'the cup of tea',
        'I am a good boy',
        'I am a good developer',
        'understand the meaning of words',
        'your videos are good']

In [4]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [5]:
## define vocab size
voc_size = 10000

In [6]:
# One hot representation for every word
one_hot_repr = [one_hot(words, voc_size) for words in sent]
one_hot_repr


[[1132, 9945, 1222, 250],
 [1132, 9945, 1222, 7919],
 [1132, 256, 1222, 8042],
 [4578, 5857, 9417, 4359, 2993],
 [4578, 5857, 9417, 4359, 3911],
 [7950, 1132, 4880, 1222, 2664],
 [1362, 881, 5397, 4359]]

In [10]:
# word embedding Representation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.utils import pad_sequences # to set up max sentence length
from tensorflow.keras.models import Sequential

In [11]:
import numpy as np

In [12]:
sent_length = 8
# max all sentences of 8 words
embedded_docs  = pad_sequences(one_hot_repr, padding = 'pre', maxlen = sent_length) # Pre to add 0 in beginning
print(embedded_docs)

[[   0    0    0    0 1132 9945 1222  250]
 [   0    0    0    0 1132 9945 1222 7919]
 [   0    0    0    0 1132  256 1222 8042]
 [   0    0    0 4578 5857 9417 4359 2993]
 [   0    0    0 4578 5857 9417 4359 3911]
 [   0    0    0 7950 1132 4880 1222 2664]
 [   0    0    0    0 1362  881 5397 4359]]


In [19]:
# feature representation
dim = 10


In [20]:
model = Sequential()
model.add(Embedding(voc_size, dim, input_length = sent_length))
model.compile('adam', 'mse')

In [21]:
model.summary()

In [22]:
model.predict(embedded_docs)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 196ms/step


array([[[ 8.34022835e-03,  2.46273540e-02, -6.12626225e-03,
          2.93953344e-03,  1.80278458e-02, -5.16026095e-03,
         -3.59427817e-02,  1.51626207e-02,  2.80247368e-02,
          1.11686066e-03],
        [ 8.34022835e-03,  2.46273540e-02, -6.12626225e-03,
          2.93953344e-03,  1.80278458e-02, -5.16026095e-03,
         -3.59427817e-02,  1.51626207e-02,  2.80247368e-02,
          1.11686066e-03],
        [ 8.34022835e-03,  2.46273540e-02, -6.12626225e-03,
          2.93953344e-03,  1.80278458e-02, -5.16026095e-03,
         -3.59427817e-02,  1.51626207e-02,  2.80247368e-02,
          1.11686066e-03],
        [ 8.34022835e-03,  2.46273540e-02, -6.12626225e-03,
          2.93953344e-03,  1.80278458e-02, -5.16026095e-03,
         -3.59427817e-02,  1.51626207e-02,  2.80247368e-02,
          1.11686066e-03],
        [-1.70725696e-02, -4.24411073e-02,  2.24483944e-02,
         -1.11752637e-02, -4.16289791e-02, -4.37082425e-02,
         -4.29628156e-02, -2.77883895e-02,  1.871973

In [23]:
embedded_docs[0]

array([   0,    0,    0,    0, 1132, 9945, 1222,  250], dtype=int32)