### Word Embedding Techniques using Embedding Layer in Keras

In [1]:
import tensorflow as tf
print(tf.__version__)

2.10.0


In [2]:
from tensorflow.keras.preprocessing.text import one_hot

In [3]:
### Sentences
sent=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good']

In [4]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [8]:
### Vocabulary Size
voc_size=500

## One Hot Representation

In [9]:
onehot_repr = [one_hot(words, voc_size) for words in sent]
print(onehot_repr)

[[77, 48, 76, 136], [77, 48, 76, 289], [77, 125, 76, 300], [448, 429, 490, 5, 208], [448, 429, 490, 5, 87], [297, 77, 198, 76, 481], [377, 420, 463, 5]]


## Word Embedding Representation

In [10]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [11]:
import numpy as np

In [13]:
## Pre Padding

sent_length = 8
embedded_docs = pad_sequences(onehot_repr, padding='pre', maxlen = sent_length)
print(embedded_docs)

[[  0   0   0   0  77  48  76 136]
 [  0   0   0   0  77  48  76 289]
 [  0   0   0   0  77 125  76 300]
 [  0   0   0 448 429 490   5 208]
 [  0   0   0 448 429 490   5  87]
 [  0   0   0 297  77 198  76 481]
 [  0   0   0   0 377 420 463   5]]


## Training

In [14]:
## 10 feature dimensions
dim = 10

In [15]:
model = Sequential()
model.add(Embedding(voc_size, 10, input_length = sent_length))
model.compile('adam', 'mse')

In [16]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             5000      
                                                                 
Total params: 5,000
Trainable params: 5,000
Non-trainable params: 0
_________________________________________________________________


In [17]:
## 'the glass of milk'
embedded_docs[0]

array([  0,   0,   0,   0,  77,  48,  76, 136])

In [18]:
model.predict(embedded_docs[0])



array([[ 1.0963369e-02, -1.8766038e-03, -1.7619431e-02, -1.0783244e-02,
        -1.2704134e-03, -1.3498217e-04,  1.3911400e-02, -4.0959395e-02,
         3.5117242e-02, -4.2429067e-02],
       [ 1.0963369e-02, -1.8766038e-03, -1.7619431e-02, -1.0783244e-02,
        -1.2704134e-03, -1.3498217e-04,  1.3911400e-02, -4.0959395e-02,
         3.5117242e-02, -4.2429067e-02],
       [ 1.0963369e-02, -1.8766038e-03, -1.7619431e-02, -1.0783244e-02,
        -1.2704134e-03, -1.3498217e-04,  1.3911400e-02, -4.0959395e-02,
         3.5117242e-02, -4.2429067e-02],
       [ 1.0963369e-02, -1.8766038e-03, -1.7619431e-02, -1.0783244e-02,
        -1.2704134e-03, -1.3498217e-04,  1.3911400e-02, -4.0959395e-02,
         3.5117242e-02, -4.2429067e-02],
       [-2.2751821e-02,  1.4934648e-02,  2.7009819e-02, -4.0415846e-02,
         2.7857188e-02,  3.2000128e-02,  4.3633208e-03, -7.7165663e-05,
         1.1780895e-02,  4.3432724e-02],
       [ 4.9029555e-02,  4.8177790e-02,  1.0014962e-02, -1.5703619e-02,
   

In [19]:
model.predict(embedded_docs)



array([[[ 1.09633692e-02, -1.87660381e-03, -1.76194310e-02,
         -1.07832439e-02, -1.27041340e-03, -1.34982169e-04,
          1.39114000e-02, -4.09593955e-02,  3.51172425e-02,
         -4.24290672e-02],
        [ 1.09633692e-02, -1.87660381e-03, -1.76194310e-02,
         -1.07832439e-02, -1.27041340e-03, -1.34982169e-04,
          1.39114000e-02, -4.09593955e-02,  3.51172425e-02,
         -4.24290672e-02],
        [ 1.09633692e-02, -1.87660381e-03, -1.76194310e-02,
         -1.07832439e-02, -1.27041340e-03, -1.34982169e-04,
          1.39114000e-02, -4.09593955e-02,  3.51172425e-02,
         -4.24290672e-02],
        [ 1.09633692e-02, -1.87660381e-03, -1.76194310e-02,
         -1.07832439e-02, -1.27041340e-03, -1.34982169e-04,
          1.39114000e-02, -4.09593955e-02,  3.51172425e-02,
         -4.24290672e-02],
        [-2.27518212e-02,  1.49346478e-02,  2.70098187e-02,
         -4.04158458e-02,  2.78571881e-02,  3.20001282e-02,
          4.36332077e-03, -7.71656632e-05,  1.178089

In [20]:
print(model.predict(embedded_docs)[0])

[[ 1.0963369e-02 -1.8766038e-03 -1.7619431e-02 -1.0783244e-02
  -1.2704134e-03 -1.3498217e-04  1.3911400e-02 -4.0959395e-02
   3.5117242e-02 -4.2429067e-02]
 [ 1.0963369e-02 -1.8766038e-03 -1.7619431e-02 -1.0783244e-02
  -1.2704134e-03 -1.3498217e-04  1.3911400e-02 -4.0959395e-02
   3.5117242e-02 -4.2429067e-02]
 [ 1.0963369e-02 -1.8766038e-03 -1.7619431e-02 -1.0783244e-02
  -1.2704134e-03 -1.3498217e-04  1.3911400e-02 -4.0959395e-02
   3.5117242e-02 -4.2429067e-02]
 [ 1.0963369e-02 -1.8766038e-03 -1.7619431e-02 -1.0783244e-02
  -1.2704134e-03 -1.3498217e-04  1.3911400e-02 -4.0959395e-02
   3.5117242e-02 -4.2429067e-02]
 [-2.2751821e-02  1.4934648e-02  2.7009819e-02 -4.0415846e-02
   2.7857188e-02  3.2000128e-02  4.3633208e-03 -7.7165663e-05
   1.1780895e-02  4.3432724e-02]
 [ 4.9029555e-02  4.8177790e-02  1.0014962e-02 -1.5703619e-02
   2.4668422e-02  3.8605817e-03  2.7186599e-02 -7.3823556e-03
  -3.4520805e-02  2.7472232e-02]
 [-1.6310312e-02 -4.2187907e-02 -4.7854077e-02  3.1025972e

In [21]:
### Assignment

sent=["The world is a better place",
      "Marvel series is my favourite movie",
      "I like DC movies",
      "the cat is eating the food",
      "Tom and Jerry is my favourite movie",
      "Python is my favourite programming language"
      ]