In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
sentences = [
    "Sun is bright.",
    "Coffee is hot coffee.",
    "Dog runs very fast.",
    "Rain falls and falls.",
    "Music is loud music.",
    "Stars shine at night.",
    "He runs and runs.",
    "She smiles and smiles."
]


In [3]:
sentences

['Sun is bright.',
 'Coffee is hot coffee.',
 'Dog runs very fast.',
 'Rain falls and falls.',
 'Music is loud music.',
 'Stars shine at night.',
 'He runs and runs.',
 'She smiles and smiles.']

In [4]:
#### vocabulary size

vocab_size=10000

In [5]:
#### applying one hot encoding


one_hot_repr=[one_hot(words,vocab_size) for words in sentences]
one_hot_repr

[[6977, 5573, 944],
 [8595, 5573, 4794, 8595],
 [6212, 2039, 1230, 3508],
 [9798, 2502, 8909, 2502],
 [5841, 5573, 1473, 5841],
 [4546, 8939, 3979, 8283],
 [2252, 2039, 8909, 2039],
 [7868, 2925, 8909, 2925]]

In [6]:
### word embedding representation

from tensorflow.keras.layers import Embedding
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential

In [7]:
import numpy as np

In [10]:
sent_length=6
embedded_docs=pad_sequences(one_hot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[   0    0    0 6977 5573  944]
 [   0    0 8595 5573 4794 8595]
 [   0    0 6212 2039 1230 3508]
 [   0    0 9798 2502 8909 2502]
 [   0    0 5841 5573 1473 5841]
 [   0    0 4546 8939 3979 8283]
 [   0    0 2252 2039 8909 2039]
 [   0    0 7868 2925 8909 2925]]


In [11]:
#### feature representation

dim=10

In [13]:
model=Sequential()
model.add(Embedding(vocab_size,dim,input_length=sent_length))
model.compile('adam','mse')

In [15]:
# build model so Embedding weights are created
model.build((None, sent_length))

In [16]:
model.summary()

In [17]:
model.predict(embedded_docs)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step


array([[[ 0.03069583,  0.03247822,  0.00212836, -0.04853854,
         -0.03476848,  0.00866593, -0.00979584, -0.025225  ,
          0.03414248, -0.00230453],
        [ 0.03069583,  0.03247822,  0.00212836, -0.04853854,
         -0.03476848,  0.00866593, -0.00979584, -0.025225  ,
          0.03414248, -0.00230453],
        [ 0.03069583,  0.03247822,  0.00212836, -0.04853854,
         -0.03476848,  0.00866593, -0.00979584, -0.025225  ,
          0.03414248, -0.00230453],
        [-0.04646481, -0.03793439, -0.03002741, -0.01806056,
          0.01422871, -0.03004833,  0.02099613, -0.02569382,
         -0.04084489,  0.04586797],
        [-0.0069498 , -0.01691566, -0.03503663,  0.02285895,
         -0.04970759, -0.02673147, -0.01429   ,  0.01898748,
          0.03769578, -0.02544506],
        [ 0.04368978, -0.02555447, -0.00582609, -0.0080317 ,
         -0.0305723 ,  0.03215773, -0.00916536,  0.00247393,
          0.03527625, -0.01525604]],

       [[ 0.03069583,  0.03247822,  0.00212836, -0

In [20]:
# pass a batched sample (shape: 1 x sent_length) to avoid shape errors
model.predict(np.expand_dims(embedded_docs[0], 0))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step


array([[[ 0.03069583,  0.03247822,  0.00212836, -0.04853854,
         -0.03476848,  0.00866593, -0.00979584, -0.025225  ,
          0.03414248, -0.00230453],
        [ 0.03069583,  0.03247822,  0.00212836, -0.04853854,
         -0.03476848,  0.00866593, -0.00979584, -0.025225  ,
          0.03414248, -0.00230453],
        [ 0.03069583,  0.03247822,  0.00212836, -0.04853854,
         -0.03476848,  0.00866593, -0.00979584, -0.025225  ,
          0.03414248, -0.00230453],
        [-0.04646481, -0.03793439, -0.03002741, -0.01806056,
          0.01422871, -0.03004833,  0.02099613, -0.02569382,
         -0.04084489,  0.04586797],
        [-0.0069498 , -0.01691566, -0.03503663,  0.02285895,
         -0.04970759, -0.02673147, -0.01429   ,  0.01898748,
          0.03769578, -0.02544506],
        [ 0.04368978, -0.02555447, -0.00582609, -0.0080317 ,
         -0.0305723 ,  0.03215773, -0.00916536,  0.00247393,
          0.03527625, -0.01525604]]], dtype=float32)