In [6]:
from tensorflow.keras.preprocessing.text import one_hot

In [7]:
sent = ['the glass of milk',
        'the glass of juice',
        'the cup of tea',
        'i am a good boy',
        'i am a good developer',
        'understand the meaning of words',
        'your videos are good']

In [8]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'i am a good boy',
 'i am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [None]:
## Define the vocabulary size

## limitting the number of unique words in vocabulary
voc_size = 10000

In [None]:
## One Hot Representation

## Hashes each word to an integer index between 1 and voc_size.
## Returns a list of integer indices for the words in the sentence.

one_hot_repr = [one_hot(words, voc_size) for words in sent]
one_hot_repr

[[6773, 6487, 8280, 3765],
 [6773, 6487, 8280, 6500],
 [6773, 8202, 8280, 4394],
 [4630, 4604, 3295, 6996, 9239],
 [4630, 4604, 3295, 6996, 7746],
 [6594, 6773, 495, 8280, 296],
 [1383, 6723, 4815, 6996]]

In [11]:
# Word Embedding Representation

from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import pad_sequences

In [12]:
import numpy as np

In [13]:
sent_length = 8
embedded_docs = pad_sequences(one_hot_repr, padding = 'pre', maxlen = sent_length)
print(embedded_docs)

[[   0    0    0    0 6773 6487 8280 3765]
 [   0    0    0    0 6773 6487 8280 6500]
 [   0    0    0    0 6773 8202 8280 4394]
 [   0    0    0 4630 4604 3295 6996 9239]
 [   0    0    0 4630 4604 3295 6996 7746]
 [   0    0    0 6594 6773  495 8280  296]
 [   0    0    0    0 1383 6723 4815 6996]]


In [14]:
# feature representation
dim = 10

In [15]:
model = Sequential()
model.add(Embedding(voc_size, dim, input_length = sent_length))
model.compile('adam', 'mse')

In [16]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             100000    
                                                                 
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [17]:
model.predict(embedded_docs)



array([[[ 4.93737310e-03, -4.00657579e-03, -3.04873586e-02,
          1.11623518e-02, -2.47253422e-02,  3.77607346e-03,
         -4.41287868e-02, -6.74712658e-03, -8.00870359e-04,
         -7.11967796e-03],
        [ 4.93737310e-03, -4.00657579e-03, -3.04873586e-02,
          1.11623518e-02, -2.47253422e-02,  3.77607346e-03,
         -4.41287868e-02, -6.74712658e-03, -8.00870359e-04,
         -7.11967796e-03],
        [ 4.93737310e-03, -4.00657579e-03, -3.04873586e-02,
          1.11623518e-02, -2.47253422e-02,  3.77607346e-03,
         -4.41287868e-02, -6.74712658e-03, -8.00870359e-04,
         -7.11967796e-03],
        [ 4.93737310e-03, -4.00657579e-03, -3.04873586e-02,
          1.11623518e-02, -2.47253422e-02,  3.77607346e-03,
         -4.41287868e-02, -6.74712658e-03, -8.00870359e-04,
         -7.11967796e-03],
        [-1.37701854e-02,  1.49779432e-02, -1.82290077e-02,
         -2.43199226e-02,  3.22342254e-02,  3.41801308e-02,
          1.27545111e-02,  2.66531371e-02, -4.297391

In [18]:
embedded_docs[0]

array([   0,    0,    0,    0, 6773, 6487, 8280, 3765])

In [19]:
model.predict(embedded_docs[0])



array([[ 4.93737310e-03, -4.00657579e-03, -3.04873586e-02,
         1.11623518e-02, -2.47253422e-02,  3.77607346e-03,
        -4.41287868e-02, -6.74712658e-03, -8.00870359e-04,
        -7.11967796e-03],
       [ 4.93737310e-03, -4.00657579e-03, -3.04873586e-02,
         1.11623518e-02, -2.47253422e-02,  3.77607346e-03,
        -4.41287868e-02, -6.74712658e-03, -8.00870359e-04,
        -7.11967796e-03],
       [ 4.93737310e-03, -4.00657579e-03, -3.04873586e-02,
         1.11623518e-02, -2.47253422e-02,  3.77607346e-03,
        -4.41287868e-02, -6.74712658e-03, -8.00870359e-04,
        -7.11967796e-03],
       [ 4.93737310e-03, -4.00657579e-03, -3.04873586e-02,
         1.11623518e-02, -2.47253422e-02,  3.77607346e-03,
        -4.41287868e-02, -6.74712658e-03, -8.00870359e-04,
        -7.11967796e-03],
       [-1.37701854e-02,  1.49779432e-02, -1.82290077e-02,
        -2.43199226e-02,  3.22342254e-02,  3.41801308e-02,
         1.27545111e-02,  2.66531371e-02, -4.29739133e-02,
         4.