In [3]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import one_hot

In [4]:
sent = [
    'the glass of milk',
    'the glass of juice',
    'I am a good boy',
    'the cup of tea',
    'I am a good developer',
    'understand the meaning of words',
    'your videos are good'
]

In [5]:
voc_size = 10000

In [6]:
## One hot representaton
one_hot_repr = [one_hot(words, voc_size) for words in sent]      
one_hot_repr  

[[7929, 6211, 3057, 4659],
 [7929, 6211, 3057, 8003],
 [4094, 1866, 7467, 8983, 610],
 [7929, 8822, 3057, 1500],
 [4094, 1866, 7467, 8983, 3503],
 [258, 7929, 7965, 3057, 1040],
 [7533, 1609, 349, 8983]]

In [9]:
## word embedding
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

In [10]:
sent_length = 8
embedded_docs = pad_sequences(one_hot_repr, padding='pre', maxlen=sent_length)
print(embedded_docs)

[[   0    0    0    0 7929 6211 3057 4659]
 [   0    0    0    0 7929 6211 3057 8003]
 [   0    0    0 4094 1866 7467 8983  610]
 [   0    0    0    0 7929 8822 3057 1500]
 [   0    0    0 4094 1866 7467 8983 3503]
 [   0    0    0  258 7929 7965 3057 1040]
 [   0    0    0    0 7533 1609  349 8983]]


In [11]:
# feature representation
dim=10

In [12]:
model = Sequential()
model.add(Embedding(voc_size, dim, input_length=sent_length))
model.compile('adam', 'mse')





In [13]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             100000    
                                                                 
Total params: 100000 (390.62 KB)
Trainable params: 100000 (390.62 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [14]:
model.predict(embedded_docs)



array([[[ 8.79391283e-03, -4.28573489e-02,  3.38099897e-04,
          2.81616338e-02,  1.45356171e-02, -3.75526436e-02,
         -1.38363242e-02,  1.24095455e-02,  1.86775215e-02,
          5.33417612e-03],
        [ 8.79391283e-03, -4.28573489e-02,  3.38099897e-04,
          2.81616338e-02,  1.45356171e-02, -3.75526436e-02,
         -1.38363242e-02,  1.24095455e-02,  1.86775215e-02,
          5.33417612e-03],
        [ 8.79391283e-03, -4.28573489e-02,  3.38099897e-04,
          2.81616338e-02,  1.45356171e-02, -3.75526436e-02,
         -1.38363242e-02,  1.24095455e-02,  1.86775215e-02,
          5.33417612e-03],
        [ 8.79391283e-03, -4.28573489e-02,  3.38099897e-04,
          2.81616338e-02,  1.45356171e-02, -3.75526436e-02,
         -1.38363242e-02,  1.24095455e-02,  1.86775215e-02,
          5.33417612e-03],
        [ 2.78846137e-02, -1.78618804e-02, -3.70424986e-03,
          4.57749702e-02, -2.41810214e-02, -4.49246876e-02,
          4.02784236e-02,  1.76750496e-03, -3.334514

In [15]:
embedded_docs[0]

array([   0,    0,    0,    0, 7929, 6211, 3057, 4659])

In [17]:
model.predict(embedded_docs[0])



array([[ 8.79391283e-03, -4.28573489e-02,  3.38099897e-04,
         2.81616338e-02,  1.45356171e-02, -3.75526436e-02,
        -1.38363242e-02,  1.24095455e-02,  1.86775215e-02,
         5.33417612e-03],
       [ 8.79391283e-03, -4.28573489e-02,  3.38099897e-04,
         2.81616338e-02,  1.45356171e-02, -3.75526436e-02,
        -1.38363242e-02,  1.24095455e-02,  1.86775215e-02,
         5.33417612e-03],
       [ 8.79391283e-03, -4.28573489e-02,  3.38099897e-04,
         2.81616338e-02,  1.45356171e-02, -3.75526436e-02,
        -1.38363242e-02,  1.24095455e-02,  1.86775215e-02,
         5.33417612e-03],
       [ 8.79391283e-03, -4.28573489e-02,  3.38099897e-04,
         2.81616338e-02,  1.45356171e-02, -3.75526436e-02,
        -1.38363242e-02,  1.24095455e-02,  1.86775215e-02,
         5.33417612e-03],
       [ 2.78846137e-02, -1.78618804e-02, -3.70424986e-03,
         4.57749702e-02, -2.41810214e-02, -4.49246876e-02,
         4.02784236e-02,  1.76750496e-03, -3.33451405e-02,
        -2.