In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
sentences = [
    "The sun sets in the west.",
    "Birds chirp in the morning.",
    "Water is essential for life.",
    "Books help us gain knowledge.",
    "Flowers bloom in spring."
]

sentences

['The sun sets in the west.',
 'Birds chirp in the morning.',
 'Water is essential for life.',
 'Books help us gain knowledge.',
 'Flowers bloom in spring.']

In [3]:
## Define the vocablary size

voc_size = 10000

In [4]:
## One hot representation
one_hot_repr=[one_hot(words,voc_size) for words in sentences]
one_hot_repr

[[2639, 3096, 4113, 7491, 2639, 1087],
 [8590, 7643, 7491, 2639, 4388],
 [7959, 514, 2567, 246, 5481],
 [4912, 8389, 3471, 5471, 9694],
 [1296, 7858, 7491, 7427]]

In [5]:
## word embedding representation

from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [6]:
import numpy as np

In [7]:
max_sent_length = 8

embedded_sent = pad_sequences(one_hot_repr,padding='pre',maxlen=max_sent_length)
print(embedded_sent)

[[   0    0 2639 3096 4113 7491 2639 1087]
 [   0    0    0 8590 7643 7491 2639 4388]
 [   0    0    0 7959  514 2567  246 5481]
 [   0    0    0 4912 8389 3471 5471 9694]
 [   0    0    0    0 1296 7858 7491 7427]]


In [8]:
## feature representation
dim = 10

In [13]:
model=Sequential()
model.add(Embedding(voc_size,dim,input_length=max_sent_length))
model.compile('adam','mse')

In [14]:
model.summary()

In [15]:
model.predict(embedded_sent)
## The output is a 3D array

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


array([[[-0.02262279,  0.01881728,  0.04145334, -0.00641381,
         -0.00388323,  0.0425696 ,  0.01534754, -0.03518145,
         -0.0496802 , -0.00281261],
        [-0.02262279,  0.01881728,  0.04145334, -0.00641381,
         -0.00388323,  0.0425696 ,  0.01534754, -0.03518145,
         -0.0496802 , -0.00281261],
        [ 0.03049464, -0.049602  ,  0.03377116,  0.00795424,
          0.03676493, -0.01923545,  0.02592366,  0.01521936,
          0.01001351, -0.04874031],
        [-0.02836337,  0.0184075 ,  0.02303829,  0.03614894,
          0.02217445,  0.03567104, -0.02799168, -0.01104988,
          0.01254168, -0.01043925],
        [ 0.02182573,  0.02558622,  0.00240737, -0.02127175,
         -0.03225654, -0.00973941, -0.00962235, -0.00508998,
         -0.02149426,  0.02307561],
        [ 0.02687408, -0.03154441, -0.0014332 ,  0.02291426,
          0.0277627 ,  0.00884179, -0.04485953, -0.00428619,
         -0.00442439, -0.04849715],
        [ 0.03049464, -0.049602  ,  0.03377116,  0.0

In [16]:
model.summary()

In [17]:
embedded_sent[0]

array([   0,    0, 2639, 3096, 4113, 7491, 2639, 1087], dtype=int32)