In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [3]:
sent = [ 'the glass of milk',
        'the glass of juice',
        'the cup of tea', 
        'I am a good boy', 
        'I am a good developer',
        'understand the meaning of words',
        'your videos are good']

In [4]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

## Vocabulary Size

In [6]:
voc_size = 10000

## One Hot Representation

In [8]:
onehot_repr=[one_hot(words,voc_size)for words in sent]
print(onehot_repr)

[[7678, 8630, 119, 2332], [7678, 8630, 119, 652], [7678, 2757, 119, 4759], [6028, 8441, 5747, 3570, 6966], [6028, 8441, 5747, 3570, 7851], [1766, 7678, 4707, 119, 7236], [49, 1640, 7825, 3570]]


## Word Embedding Representation

In [10]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences  //To have same no. of words & size are same
from tensorflow.keras.models import Sequential

In [11]:
import numpy as np

In [12]:
sent_length=8
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[   0    0    0    0 7678 8630  119 2332]
 [   0    0    0    0 7678 8630  119  652]
 [   0    0    0    0 7678 2757  119 4759]
 [   0    0    0 6028 8441 5747 3570 6966]
 [   0    0    0 6028 8441 5747 3570 7851]
 [   0    0    0 1766 7678 4707  119 7236]
 [   0    0    0    0   49 1640 7825 3570]]


In [13]:
dim = 15

In [14]:
model = Sequential()
model.add(Embedding(voc_size,10,input_length = sent_length))
model.compile('adam', 'mse')

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [16]:
print(model.predict(embedded_docs))

[[[-0.04557706  0.04267665 -0.04178191 -0.04439842 -0.03121405
   -0.0077999  -0.01416079  0.02743217 -0.03505149  0.03882077]
  [-0.04557706  0.04267665 -0.04178191 -0.04439842 -0.03121405
   -0.0077999  -0.01416079  0.02743217 -0.03505149  0.03882077]
  [-0.04557706  0.04267665 -0.04178191 -0.04439842 -0.03121405
   -0.0077999  -0.01416079  0.02743217 -0.03505149  0.03882077]
  [-0.04557706  0.04267665 -0.04178191 -0.04439842 -0.03121405
   -0.0077999  -0.01416079  0.02743217 -0.03505149  0.03882077]
  [-0.04425554  0.01182764 -0.00620867 -0.0286022   0.02208455
    0.04938401  0.02961474  0.01767225  0.01760724 -0.0071633 ]
  [ 0.00911114  0.0363534   0.04708281  0.01675626  0.00149338
   -0.0068845  -0.01859749  0.03492199  0.00493637  0.02131179]
  [ 0.03395362 -0.02983307  0.01222284 -0.04950794  0.01747708
    0.00250464  0.0180813  -0.04387819  0.00052825 -0.03237983]
  [ 0.04578703  0.01571432  0.01724466  0.03161139  0.0056375
   -0.03552737 -0.04032289 -0.03675763  0.0097378

In [17]:
embedded_docs[0]

array([   0,    0,    0,    0, 7678, 8630,  119, 2332])

In [18]:
embedded_docs[1]

array([   0,    0,    0,    0, 7678, 8630,  119,  652])