In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [3]:
## Sentences

sentences=['the glass of milk',
     'the glass of juice',
     'the cup of tea',
     'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

In [4]:
sentences

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [5]:
# Vocabulary size
voc_size = 10000

In [7]:
# One Hot Representation

onehot_repr = [one_hot(words, voc_size) for words in sentences]

In [8]:
print(onehot_repr) #gives the vectors, indexes

[[537, 8348, 3211, 5339], [537, 8348, 3211, 684], [537, 9494, 3211, 8486], [1606, 3944, 5745, 9948, 3487], [1606, 3944, 5745, 9948, 1708], [7350, 537, 7263, 3211, 8655], [1713, 5262, 7279, 9948]]


In [9]:
# Work Embedding Representation

from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences # all sentences should have same length of words
from tensorflow.keras.models import Sequential

In [10]:
import numpy as np

In [11]:
sent_len = 8
embedded_docs = pad_sequences(onehot_repr, padding='pre',maxlen=sent_len)

In [12]:
print(embedded_docs)

[[   0    0    0    0  537 8348 3211 5339]
 [   0    0    0    0  537 8348 3211  684]
 [   0    0    0    0  537 9494 3211 8486]
 [   0    0    0 1606 3944 5745 9948 3487]
 [   0    0    0 1606 3944 5745 9948 1708]
 [   0    0    0 7350  537 7263 3211 8655]
 [   0    0    0    0 1713 5262 7279 9948]]


In [13]:
dim = 10

In [14]:
model = Sequential()
model.add(Embedding(voc_size, 10, input_length=sent_len)) # Embedding layer
model.compile('adam','mse') #adam optimizer # metrics

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             100000    
                                                                 
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [16]:
print(model.predict(embedded_docs))

[[[ 4.2048443e-02  2.0779859e-02  3.8299087e-02  4.2672705e-02
    9.9345222e-03  4.6817649e-02 -1.1816025e-03 -1.1854254e-02
   -1.9152641e-02  3.4175292e-03]
  [ 4.2048443e-02  2.0779859e-02  3.8299087e-02  4.2672705e-02
    9.9345222e-03  4.6817649e-02 -1.1816025e-03 -1.1854254e-02
   -1.9152641e-02  3.4175292e-03]
  [ 4.2048443e-02  2.0779859e-02  3.8299087e-02  4.2672705e-02
    9.9345222e-03  4.6817649e-02 -1.1816025e-03 -1.1854254e-02
   -1.9152641e-02  3.4175292e-03]
  [ 4.2048443e-02  2.0779859e-02  3.8299087e-02  4.2672705e-02
    9.9345222e-03  4.6817649e-02 -1.1816025e-03 -1.1854254e-02
   -1.9152641e-02  3.4175292e-03]
  [-2.4100317e-02 -4.2495977e-02 -5.5661052e-04  3.0213978e-02
    2.0863127e-02 -3.2237902e-02 -2.5257517e-02  8.8260323e-04
   -2.5901401e-02 -4.5230176e-02]
  [-2.1354949e-02  4.8809085e-02 -4.7034025e-04 -1.9021118e-02
    1.9750800e-02  8.7851062e-03  3.9820258e-02  4.3227259e-02
   -2.0075310e-02  1.3563719e-02]
  [ 7.6973811e-03  3.9581385e-02  7.8961

In [17]:
embedded_docs[0]

array([   0,    0,    0,    0,  537, 8348, 3211, 5339], dtype=int32)

In [20]:
print(model.predict(embedded_docs)[0])

[[ 4.2048443e-02  2.0779859e-02  3.8299087e-02  4.2672705e-02
   9.9345222e-03  4.6817649e-02 -1.1816025e-03 -1.1854254e-02
  -1.9152641e-02  3.4175292e-03]
 [ 4.2048443e-02  2.0779859e-02  3.8299087e-02  4.2672705e-02
   9.9345222e-03  4.6817649e-02 -1.1816025e-03 -1.1854254e-02
  -1.9152641e-02  3.4175292e-03]
 [ 4.2048443e-02  2.0779859e-02  3.8299087e-02  4.2672705e-02
   9.9345222e-03  4.6817649e-02 -1.1816025e-03 -1.1854254e-02
  -1.9152641e-02  3.4175292e-03]
 [ 4.2048443e-02  2.0779859e-02  3.8299087e-02  4.2672705e-02
   9.9345222e-03  4.6817649e-02 -1.1816025e-03 -1.1854254e-02
  -1.9152641e-02  3.4175292e-03]
 [-2.4100317e-02 -4.2495977e-02 -5.5661052e-04  3.0213978e-02
   2.0863127e-02 -3.2237902e-02 -2.5257517e-02  8.8260323e-04
  -2.5901401e-02 -4.5230176e-02]
 [-2.1354949e-02  4.8809085e-02 -4.7034025e-04 -1.9021118e-02
   1.9750800e-02  8.7851062e-03  3.9820258e-02  4.3227259e-02
  -2.0075310e-02  1.3563719e-02]
 [ 7.6973811e-03  3.9581385e-02  7.8961253e-03  3.0955624e