In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
### Sentences
sent = [
    'the glass of milk',
    'the glass of juice',
    'I am a good boy',
    'I am a good developer',
    'understanding the meaning of words',
    'your videos are good'
]

In [3]:
sent

['the glass of milk',
 'the glass of juice',
 'I am a good boy',
 'I am a good developer',
 'understanding the meaning of words',
 'your videos are good']

In [4]:
### Define the vocabulary size
voc_size = 10000

In [7]:
### One Hot Representation
one_hot_repr = [one_hot(words, voc_size) for words in sent]
one_hot_repr    ### i.e. in the 10000 vocabulary size 'the' is given by the index 7774

[[7774, 8722, 5157, 2315],
 [7774, 8722, 5157, 1162],
 [4921, 2106, 5588, 4834, 1250],
 [4921, 2106, 5588, 4834, 8129],
 [2276, 7774, 2969, 5157, 6207],
 [1446, 2001, 3794, 4834]]

In [9]:
### word Embedding Representation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

In [10]:
### pad_sequences is used to make the input into same shape
sent_len = 8
embeded_docs = pad_sequences(one_hot_repr, padding='pre', maxlen=sent_len)
embeded_docs

array([[   0,    0,    0,    0, 7774, 8722, 5157, 2315],
       [   0,    0,    0,    0, 7774, 8722, 5157, 1162],
       [   0,    0,    0, 4921, 2106, 5588, 4834, 1250],
       [   0,    0,    0, 4921, 2106, 5588, 4834, 8129],
       [   0,    0,    0, 2276, 7774, 2969, 5157, 6207],
       [   0,    0,    0,    0, 1446, 2001, 3794, 4834]])

In [11]:
### Feature representation
dim = 10

model = Sequential()
model.add(Embedding(voc_size, dim, input_length=sent_len))
model.compile('adam', 'mse')
model.summary()



Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             100000    
                                                                 
Total params: 100000 (390.62 KB)
Trainable params: 100000 (390.62 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [15]:
model.predict(embeded_docs)



array([[[ 2.0877156e-02, -3.8504519e-02,  4.7782604e-02,  4.3275245e-03,
         -4.5035709e-02,  2.1154750e-02, -2.9338693e-02, -3.8576484e-02,
          4.6280947e-02, -2.6170064e-02],
        [ 2.0877156e-02, -3.8504519e-02,  4.7782604e-02,  4.3275245e-03,
         -4.5035709e-02,  2.1154750e-02, -2.9338693e-02, -3.8576484e-02,
          4.6280947e-02, -2.6170064e-02],
        [ 2.0877156e-02, -3.8504519e-02,  4.7782604e-02,  4.3275245e-03,
         -4.5035709e-02,  2.1154750e-02, -2.9338693e-02, -3.8576484e-02,
          4.6280947e-02, -2.6170064e-02],
        [ 2.0877156e-02, -3.8504519e-02,  4.7782604e-02,  4.3275245e-03,
         -4.5035709e-02,  2.1154750e-02, -2.9338693e-02, -3.8576484e-02,
          4.6280947e-02, -2.6170064e-02],
        [ 2.1385755e-02, -1.9984795e-02, -3.3784844e-02,  1.5103925e-02,
          2.0784888e-02,  3.4293022e-02, -4.1192658e-03, -5.4164529e-03,
          5.7292953e-03,  2.8169099e-02],
        [-4.1613318e-02, -2.7815355e-02,  7.0903674e-03, -3.