In [3]:
from tensorflow.keras.preprocessing.text import one_hot

In [4]:
sent = ['the glass of milk',
     'the glass of juice',
     'the cup of tea',
     'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

In [14]:
vocab_size = 10000

In [6]:
onehot_repr = [one_hot(words, voc_size) for words in sent]
print(onehot_repr)

[[454, 1559, 546, 6867], [454, 1559, 546, 5622], [454, 4149, 546, 7203], [5557, 4326, 5232, 4796, 1017], [5557, 4326, 5232, 4796, 7137], [2376, 454, 9298, 546, 7397], [4280, 5544, 7089, 4796]]


In [10]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

In [12]:
sent_len = 8
embedded_docs = pad_sequences(onehot_repr, padding='pre', maxlen=sent_len)
print(embedded_docs)

[[   0    0    0    0  454 1559  546 6867]
 [   0    0    0    0  454 1559  546 5622]
 [   0    0    0    0  454 4149  546 7203]
 [   0    0    0 5557 4326 5232 4796 1017]
 [   0    0    0 5557 4326 5232 4796 7137]
 [   0    0    0 2376  454 9298  546 7397]
 [   0    0    0    0 4280 5544 7089 4796]]


In [13]:
dim = 15

In [15]:
model = Sequential()
model.add(Embedding(vocab_size, dim, input_length=sent_len))

model.compile(optimizer='adam', loss='mse')

In [16]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 15)             150000    
                                                                 
Total params: 150,000
Trainable params: 150,000
Non-trainable params: 0
_________________________________________________________________


In [17]:
model.predict(embedded_docs)



array([[[ 0.03849835,  0.03412076, -0.04565848, -0.02517151,
          0.01087992,  0.02263323, -0.04074991,  0.00373969,
         -0.04028618, -0.00808368,  0.03318575,  0.03856674,
          0.04419627,  0.04064437, -0.04143589],
        [ 0.03849835,  0.03412076, -0.04565848, -0.02517151,
          0.01087992,  0.02263323, -0.04074991,  0.00373969,
         -0.04028618, -0.00808368,  0.03318575,  0.03856674,
          0.04419627,  0.04064437, -0.04143589],
        [ 0.03849835,  0.03412076, -0.04565848, -0.02517151,
          0.01087992,  0.02263323, -0.04074991,  0.00373969,
         -0.04028618, -0.00808368,  0.03318575,  0.03856674,
          0.04419627,  0.04064437, -0.04143589],
        [ 0.03849835,  0.03412076, -0.04565848, -0.02517151,
          0.01087992,  0.02263323, -0.04074991,  0.00373969,
         -0.04028618, -0.00808368,  0.03318575,  0.03856674,
          0.04419627,  0.04064437, -0.04143589],
        [-0.04101761, -0.00219955,  0.00667181,  0.03617293,
         -0

In [18]:
embedded_docs[0]

array([   0,    0,    0,    0,  454, 1559,  546, 6867])

In [20]:
# each word in the sentence gets converted into a vector with 15 dimensions
model.predict(embedded_docs[0])



array([[ 0.03849835,  0.03412076, -0.04565848, -0.02517151,  0.01087992,
         0.02263323, -0.04074991,  0.00373969, -0.04028618, -0.00808368,
         0.03318575,  0.03856674,  0.04419627,  0.04064437, -0.04143589],
       [ 0.03849835,  0.03412076, -0.04565848, -0.02517151,  0.01087992,
         0.02263323, -0.04074991,  0.00373969, -0.04028618, -0.00808368,
         0.03318575,  0.03856674,  0.04419627,  0.04064437, -0.04143589],
       [ 0.03849835,  0.03412076, -0.04565848, -0.02517151,  0.01087992,
         0.02263323, -0.04074991,  0.00373969, -0.04028618, -0.00808368,
         0.03318575,  0.03856674,  0.04419627,  0.04064437, -0.04143589],
       [ 0.03849835,  0.03412076, -0.04565848, -0.02517151,  0.01087992,
         0.02263323, -0.04074991,  0.00373969, -0.04028618, -0.00808368,
         0.03318575,  0.03856674,  0.04419627,  0.04064437, -0.04143589],
       [-0.04101761, -0.00219955,  0.00667181,  0.03617293, -0.01490296,
         0.03982998, -0.0417721 ,  0.02096285, 