## Generate embedding layer for a given vocabulary

In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [4]:
### sentences
sent=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

In [24]:
### Vocabulary size
voc_size=1000

### One - hot representation

In [25]:
onehot_repr=[one_hot(words,voc_size)for words in sent] 
print(onehot_repr)

[[54, 631, 171, 534], [54, 631, 171, 379], [54, 400, 171, 727], [897, 739, 235, 503, 566], [897, 739, 235, 503, 463], [117, 54, 18, 171, 631], [168, 162, 155, 503]]


### Word embedding representation

In [26]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

In [27]:
sent_length=8
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[  0   0   0   0  54 631 171 534]
 [  0   0   0   0  54 631 171 379]
 [  0   0   0   0  54 400 171 727]
 [  0   0   0 897 739 235 503 566]
 [  0   0   0 897 739 235 503 463]
 [  0   0   0 117  54  18 171 631]
 [  0   0   0   0 168 162 155 503]]


In [29]:
dim=10
model=Sequential()
model.add(Embedding(voc_size,10,input_length=sent_length))
model.compile('adam','mse')
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 8, 10)             10000     
Total params: 10,000
Trainable params: 10,000
Non-trainable params: 0
_________________________________________________________________


In [30]:
print(model.predict(embedded_docs))

[[[-3.86950597e-02 -2.30327006e-02  5.37799671e-03  3.97347696e-02
    3.74224037e-03  2.09924318e-02 -2.92789470e-02 -2.88344976e-02
   -2.53996495e-02  2.86414288e-02]
  [-3.86950597e-02 -2.30327006e-02  5.37799671e-03  3.97347696e-02
    3.74224037e-03  2.09924318e-02 -2.92789470e-02 -2.88344976e-02
   -2.53996495e-02  2.86414288e-02]
  [-3.86950597e-02 -2.30327006e-02  5.37799671e-03  3.97347696e-02
    3.74224037e-03  2.09924318e-02 -2.92789470e-02 -2.88344976e-02
   -2.53996495e-02  2.86414288e-02]
  [-3.86950597e-02 -2.30327006e-02  5.37799671e-03  3.97347696e-02
    3.74224037e-03  2.09924318e-02 -2.92789470e-02 -2.88344976e-02
   -2.53996495e-02  2.86414288e-02]
  [-1.93397533e-02 -5.60561568e-03  2.45020874e-02 -4.99960296e-02
    2.76194923e-02 -4.99268286e-02  3.79022025e-02 -4.54270244e-02
   -1.25173703e-02  2.41284706e-02]
  [-1.39081962e-02  2.99813412e-02 -3.99311297e-02  1.00292265e-04
    4.31233384e-02  1.77567936e-02 -1.08753517e-03  4.54350822e-02
   -4.61851433e-

In [31]:
print(embedded_docs[0])

[  0   0   0   0  54 631 171 534]


In [32]:
# the embedded representation of each word in the first sentence is given 
print(model.predict(embedded_docs)[0])

[[-0.03869506 -0.0230327   0.005378    0.03973477  0.00374224  0.02099243
  -0.02927895 -0.0288345  -0.02539965  0.02864143]
 [-0.03869506 -0.0230327   0.005378    0.03973477  0.00374224  0.02099243
  -0.02927895 -0.0288345  -0.02539965  0.02864143]
 [-0.03869506 -0.0230327   0.005378    0.03973477  0.00374224  0.02099243
  -0.02927895 -0.0288345  -0.02539965  0.02864143]
 [-0.03869506 -0.0230327   0.005378    0.03973477  0.00374224  0.02099243
  -0.02927895 -0.0288345  -0.02539965  0.02864143]
 [-0.01933975 -0.00560562  0.02450209 -0.04999603  0.02761949 -0.04992683
   0.0379022  -0.04542702 -0.01251737  0.02412847]
 [-0.0139082   0.02998134 -0.03993113  0.00010029  0.04312334  0.01775679
  -0.00108754  0.04543508 -0.00461851 -0.02209867]
 [ 0.03487969 -0.04607738 -0.00248673 -0.004442   -0.00108426  0.02390363
  -0.02785038  0.02774427  0.01370284 -0.00415836]
 [-0.02214017  0.0309616  -0.04019886 -0.03834438  0.03436638  0.01787687
  -0.01510024 -0.02272683  0.04705639  0.01544959]]