In [2]:
from tensorflow.keras.preprocessing.text import one_hot

In [22]:
#Sentences
sent = [
    'the glass of milk',
    'the glass of juice',
    'the cup of tea',
    'I am a good boy',
    'I am a good developer',
    'I am a good girl',
    'understand the meaning of words',
    'your videos are good'
]

In [23]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'I am a good girl',
 'understand the meaning of words',
 'your videos are good']

In [5]:
# Define Vocabulary Size
vocab_size = 10000

In [24]:
one_hot_repr = [one_hot(words,vocab_size) for words in sent]
one_hot_repr

[[2527, 5856, 6876, 8940],
 [2527, 5856, 6876, 7885],
 [2527, 4607, 6876, 475],
 [4037, 3001, 9002, 4055, 8240],
 [4037, 3001, 9002, 4055, 1055],
 [4037, 3001, 9002, 4055, 3627],
 [3322, 2527, 2902, 6876, 6926],
 [4691, 5028, 3334, 4055]]

In [7]:
# Word Embedding Representation
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import pad_sequences
from tensorflow.keras.layers import Embedding
import numpy as np

In [25]:
# Padding basically adds 0 in front or end to make all sentences length same.

sent_length = 8
embedded_doc = pad_sequences(one_hot_repr,padding='pre',maxlen=sent_length)
print(embedded_doc)

[[   0    0    0    0 2527 5856 6876 8940]
 [   0    0    0    0 2527 5856 6876 7885]
 [   0    0    0    0 2527 4607 6876  475]
 [   0    0    0 4037 3001 9002 4055 8240]
 [   0    0    0 4037 3001 9002 4055 1055]
 [   0    0    0 4037 3001 9002 4055 3627]
 [   0    0    0 3322 2527 2902 6876 6926]
 [   0    0    0    0 4691 5028 3334 4055]]


In [10]:
# Feature Representation
dim = 10

In [26]:
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=dim ,input_length= sent_length))
# input_dim -> Number of unique words in dataset
# input_length -> Length of each sentence
# output_dim -> output_dim is the size of the vector for each word. 
# It represents the number of "features" or "coordinates" the model will use to describe a single word's meaning.
model.compile(optimizer = 'adam', loss = 'mse')

In [27]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 8, 10)             100000    
                                                                 
Total params: 100000 (390.62 KB)
Trainable params: 100000 (390.62 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [28]:
model.predict(embedded_doc)



array([[[ 0.02823022, -0.0413732 , -0.03176885, -0.02574197,
          0.00691724, -0.01648487,  0.04597205, -0.02490417,
         -0.0364845 , -0.01216457],
        [ 0.02823022, -0.0413732 , -0.03176885, -0.02574197,
          0.00691724, -0.01648487,  0.04597205, -0.02490417,
         -0.0364845 , -0.01216457],
        [ 0.02823022, -0.0413732 , -0.03176885, -0.02574197,
          0.00691724, -0.01648487,  0.04597205, -0.02490417,
         -0.0364845 , -0.01216457],
        [ 0.02823022, -0.0413732 , -0.03176885, -0.02574197,
          0.00691724, -0.01648487,  0.04597205, -0.02490417,
         -0.0364845 , -0.01216457],
        [ 0.03373089,  0.02335889,  0.04212923, -0.02982181,
          0.00197626, -0.00609506, -0.04516574, -0.01686954,
         -0.03207934, -0.02256121],
        [ 0.00057911,  0.03274951, -0.02692919, -0.04083487,
         -0.0220351 , -0.04737801, -0.00526967, -0.01295025,
          0.02139474,  0.02885795],
        [-0.02061629, -0.03451159, -0.00034813, -0.0

In [29]:
model.predict(embedded_doc[0])



array([[ 0.02823022, -0.0413732 , -0.03176885, -0.02574197,  0.00691724,
        -0.01648487,  0.04597205, -0.02490417, -0.0364845 , -0.01216457],
       [ 0.02823022, -0.0413732 , -0.03176885, -0.02574197,  0.00691724,
        -0.01648487,  0.04597205, -0.02490417, -0.0364845 , -0.01216457],
       [ 0.02823022, -0.0413732 , -0.03176885, -0.02574197,  0.00691724,
        -0.01648487,  0.04597205, -0.02490417, -0.0364845 , -0.01216457],
       [ 0.02823022, -0.0413732 , -0.03176885, -0.02574197,  0.00691724,
        -0.01648487,  0.04597205, -0.02490417, -0.0364845 , -0.01216457],
       [ 0.03373089,  0.02335889,  0.04212923, -0.02982181,  0.00197626,
        -0.00609506, -0.04516574, -0.01686954, -0.03207934, -0.02256121],
       [ 0.00057911,  0.03274951, -0.02692919, -0.04083487, -0.0220351 ,
        -0.04737801, -0.00526967, -0.01295025,  0.02139474,  0.02885795],
       [-0.02061629, -0.03451159, -0.00034813, -0.0160552 , -0.03109695,
         0.02504179,  0.02852449, -0.0150419 