In [2]:
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding

In [3]:
# sentences

sent = [
    'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good'
]

In [4]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [5]:
# define vocab size
voc_size = 10000

In [6]:
# one hot encoding words

one_hot_repr = [one_hot(
    words, voc_size
)
for words in sent]


In [7]:
one_hot_repr

[[9415, 8336, 6310, 2320],
 [9415, 8336, 6310, 208],
 [9415, 3572, 6310, 7506],
 [8290, 2429, 3868, 1461, 3428],
 [8290, 2429, 3868, 1461, 9354],
 [295, 9415, 3066, 6310, 5835],
 [8103, 9746, 9214, 1461]]

In [8]:
# add padding to make sentences of equal length

sent_len_max = 8
embedded_docs = pad_sequences(one_hot_repr, maxlen=sent_len_max, padding='pre')

print("Padded Input:")
print(embedded_docs)


Padded Input:
[[   0    0    0    0 9415 8336 6310 2320]
 [   0    0    0    0 9415 8336 6310  208]
 [   0    0    0    0 9415 3572 6310 7506]
 [   0    0    0 8290 2429 3868 1461 3428]
 [   0    0    0 8290 2429 3868 1461 9354]
 [   0    0    0  295 9415 3066 6310 5835]
 [   0    0    0    0 8103 9746 9214 1461]]


In [9]:
# feature representation

dim = 10 # use max 10 features


In [10]:
# 5. Define the model with just an Embedding layer
model = Sequential()
model.add(Embedding(input_dim=voc_size, output_dim=dim, input_length=sent_len_max))
model.build(input_shape=(None, sent_len_max))
model.summary()




In [19]:
embedded_docs[0, :]

array([   0,    0,    0,    0, 9415, 8336, 6310, 2320])

In [24]:
# prediction
model.predict(embedded_docs)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step


array([[[-0.03247806, -0.04244839, -0.0086323 ,  0.02192713,
          0.00699679,  0.01921798, -0.04554874, -0.01866899,
         -0.0421389 , -0.00914978],
        [-0.03247806, -0.04244839, -0.0086323 ,  0.02192713,
          0.00699679,  0.01921798, -0.04554874, -0.01866899,
         -0.0421389 , -0.00914978],
        [-0.03247806, -0.04244839, -0.0086323 ,  0.02192713,
          0.00699679,  0.01921798, -0.04554874, -0.01866899,
         -0.0421389 , -0.00914978],
        [-0.03247806, -0.04244839, -0.0086323 ,  0.02192713,
          0.00699679,  0.01921798, -0.04554874, -0.01866899,
         -0.0421389 , -0.00914978],
        [-0.04871435, -0.01907499,  0.04186859, -0.02558087,
          0.04947158, -0.00741359, -0.04175021, -0.03989603,
         -0.02458916,  0.04440763],
        [ 0.02148903, -0.02812645,  0.02382683,  0.03531904,
          0.03870994,  0.02617638, -0.04880805,  0.03769511,
          0.02440566, -0.0151392 ],
        [ 0.03614211, -0.04756167, -0.03937535,  0.0