In [145]:
from tensorflow.keras.preprocessing.text import one_hot
# one_hot() is a utility function that converts words in your text into integer indices, i.e. numeric representations of words.

In [146]:
# sentences
sent=['the glass of milk',
      'the glass of juice',
      'the cup of tea',
      'I am a good boy',
      'I am a good developer',
      'understand the meaning of words',
      'your videos are good',]

In [147]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [148]:
# Define the vocabulary size
voc_size=10000


In [149]:
# One Hot Representation
one_hot_repr=[one_hot(words,voc_size) for words in sent]
one_hot_repr#Our sentences has been converted to vectors in our 10000 vocabulary

[[7967, 8352, 4495, 2229],
 [7967, 8352, 4495, 3655],
 [7967, 6393, 4495, 9776],
 [7634, 2844, 6829, 3570, 9731],
 [7634, 2844, 6829, 3570, 1418],
 [8066, 7967, 5437, 4495, 2689],
 [2615, 5059, 3861, 3570]]

In [150]:
# We observe that similar words have the same vectors
# Time stamp-->7.21


In [151]:
# Word Embedding Representation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.models import Sequential



In [152]:
import numpy as np

In [153]:
# The no of words in each sentence is different 
# We need to make all the sentences the same word otherwise we will not be able to
# train them in our RNN

In [154]:
sent_length=8
embedding_docs=pad_sequences(one_hot_repr,padding='pre',maxlen=sent_length)
# pre-->No of 0 are added forward

In [155]:
embedding_docs

array([[   0,    0,    0,    0, 7967, 8352, 4495, 2229],
       [   0,    0,    0,    0, 7967, 8352, 4495, 3655],
       [   0,    0,    0,    0, 7967, 6393, 4495, 9776],
       [   0,    0,    0, 7634, 2844, 6829, 3570, 9731],
       [   0,    0,    0, 7634, 2844, 6829, 3570, 1418],
       [   0,    0,    0, 8066, 7967, 5437, 4495, 2689],
       [   0,    0,    0,    0, 2615, 5059, 3861, 3570]], dtype=int32)

In [173]:
# feature representation
dim=10 # We want 10 features according to the dimensions
model=Sequential()
model.add(Embedding(voc_size,dim,input_shape=(sent_length,)))
model.compile('adam','mse')

In [171]:
voc_size

10000

In [172]:
model.summary()

In [159]:
model.predict(embedding_docs)
# Every word gets represented by 10 dimensions

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step


array([[[-0.0327824 ,  0.00917498, -0.04189951,  0.04457494,
          0.00451563, -0.04021727,  0.01095518,  0.02193112,
         -0.0296897 , -0.02172191],
        [-0.0327824 ,  0.00917498, -0.04189951,  0.04457494,
          0.00451563, -0.04021727,  0.01095518,  0.02193112,
         -0.0296897 , -0.02172191],
        [-0.0327824 ,  0.00917498, -0.04189951,  0.04457494,
          0.00451563, -0.04021727,  0.01095518,  0.02193112,
         -0.0296897 , -0.02172191],
        [-0.0327824 ,  0.00917498, -0.04189951,  0.04457494,
          0.00451563, -0.04021727,  0.01095518,  0.02193112,
         -0.0296897 , -0.02172191],
        [-0.00606792,  0.00435256,  0.02134399, -0.00590109,
          0.03802241, -0.03689677,  0.0009459 , -0.0079873 ,
         -0.02662393,  0.03379666],
        [ 0.03305521,  0.02072152,  0.0077806 ,  0.01592061,
         -0.02155283,  0.0464747 , -0.03965688,  0.01844264,
         -0.0028893 , -0.02579367],
        [-0.02196165,  0.02707112, -0.04727286, -0.0

In [167]:
single_input = np.expand_dims(embedding_docs[0], axis=0)  # shape becomes (1, seq_length)
prediction = model.predict(single_input)
prediction

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step


array([[[-0.0327824 ,  0.00917498, -0.04189951,  0.04457494,
          0.00451563, -0.04021727,  0.01095518,  0.02193112,
         -0.0296897 , -0.02172191],
        [-0.0327824 ,  0.00917498, -0.04189951,  0.04457494,
          0.00451563, -0.04021727,  0.01095518,  0.02193112,
         -0.0296897 , -0.02172191],
        [-0.0327824 ,  0.00917498, -0.04189951,  0.04457494,
          0.00451563, -0.04021727,  0.01095518,  0.02193112,
         -0.0296897 , -0.02172191],
        [-0.0327824 ,  0.00917498, -0.04189951,  0.04457494,
          0.00451563, -0.04021727,  0.01095518,  0.02193112,
         -0.0296897 , -0.02172191],
        [-0.00606792,  0.00435256,  0.02134399, -0.00590109,
          0.03802241, -0.03689677,  0.0009459 , -0.0079873 ,
         -0.02662393,  0.03379666],
        [ 0.03305521,  0.02072152,  0.0077806 ,  0.01592061,
         -0.02155283,  0.0464747 , -0.03965688,  0.01844264,
         -0.0028893 , -0.02579367],
        [-0.02196165,  0.02707112, -0.04727286, -0.0

In [169]:
# model.predict(embedding_docs[0])
# Here, embedding_docs[0] is likely a 1D array (shape like (seq_length,)) representing one sequence, but Keras expects batches of sequences with shape (batch_size, seq_length).
# So we convert to 2 dimension