## **Word Embedding Techniques using Embedding Layer in Keras**

In [7]:
import numpy as np
import tensorflow
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [2]:
print("Tensorflow version: ", tensorflow.__version__)

Tensorflow version:  2.8.2


In [3]:
# All sentences

sentences = ['the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good']

sentences

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

## **One Hot Representation**

In [6]:
# Vocabulary size
vocab_size = 100_00

one_hot_repre = [one_hot(input_text=words, n=vocab_size) for words in sentences]

one_hot_repre

[[6638, 7071, 4095, 1204],
 [6638, 7071, 4095, 7693],
 [6638, 3892, 4095, 5300],
 [724, 9883, 7354, 8101, 7677],
 [724, 9883, 7354, 8101, 3108],
 [3030, 6638, 1590, 4095, 7843],
 [3635, 451, 8711, 8101]]

In [8]:
# Let's fixed the sentence lenghth. So that, all the sentence become same size
sent_length = 8

embedded_docs = pad_sequences(sequences=one_hot_repre, maxlen=sent_length, padding='pre')

embedded_docs

array([[   0,    0,    0,    0, 6638, 7071, 4095, 1204],
       [   0,    0,    0,    0, 6638, 7071, 4095, 7693],
       [   0,    0,    0,    0, 6638, 3892, 4095, 5300],
       [   0,    0,    0,  724, 9883, 7354, 8101, 7677],
       [   0,    0,    0,  724, 9883, 7354, 8101, 3108],
       [   0,    0,    0, 3030, 6638, 1590, 4095, 7843],
       [   0,    0,    0,    0, 3635,  451, 8711, 8101]], dtype=int32)

In [10]:
# Let's take our dimension. We can choose dimension.
num_of_dim = 10

model = Sequential()

model.add(layer=Embedding(input_dim=vocab_size, output_dim=num_of_dim, input_length=sent_length))

model.compile(optimizer='adam', loss='mse')

In [11]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             100000    
                                                                 
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [12]:
print(model.predict(x=embedded_docs))

[[[-2.3428679e-02 -4.4658769e-02  4.7955737e-03  8.8461637e-03
    1.1811078e-02  3.9300095e-02  1.4992777e-02 -1.7618012e-02
    2.4685860e-03  1.6314451e-02]
  [-2.3428679e-02 -4.4658769e-02  4.7955737e-03  8.8461637e-03
    1.1811078e-02  3.9300095e-02  1.4992777e-02 -1.7618012e-02
    2.4685860e-03  1.6314451e-02]
  [-2.3428679e-02 -4.4658769e-02  4.7955737e-03  8.8461637e-03
    1.1811078e-02  3.9300095e-02  1.4992777e-02 -1.7618012e-02
    2.4685860e-03  1.6314451e-02]
  [-2.3428679e-02 -4.4658769e-02  4.7955737e-03  8.8461637e-03
    1.1811078e-02  3.9300095e-02  1.4992777e-02 -1.7618012e-02
    2.4685860e-03  1.6314451e-02]
  [ 3.8196240e-02 -3.7441410e-02  1.2390982e-02 -3.9371919e-02
   -2.9834498e-02 -3.4310199e-02  6.4099915e-03 -1.4865398e-02
    2.5405768e-02  2.8853450e-02]
  [ 2.0536866e-02  2.1996226e-02 -2.3300469e-02 -2.1834349e-02
   -4.3215431e-02  2.5783036e-02 -3.0801881e-02  2.6892986e-02
    2.6687238e-02  1.8847015e-02]
  [ 1.8817712e-02 -1.4448047e-02 -1.3601

In [13]:
embedded_docs[0]

array([   0,    0,    0,    0, 6638, 7071, 4095, 1204], dtype=int32)

In [14]:
print(model.predict(x=embedded_docs)[0])

[[-0.02342868 -0.04465877  0.00479557  0.00884616  0.01181108  0.0393001
   0.01499278 -0.01761801  0.00246859  0.01631445]
 [-0.02342868 -0.04465877  0.00479557  0.00884616  0.01181108  0.0393001
   0.01499278 -0.01761801  0.00246859  0.01631445]
 [-0.02342868 -0.04465877  0.00479557  0.00884616  0.01181108  0.0393001
   0.01499278 -0.01761801  0.00246859  0.01631445]
 [-0.02342868 -0.04465877  0.00479557  0.00884616  0.01181108  0.0393001
   0.01499278 -0.01761801  0.00246859  0.01631445]
 [ 0.03819624 -0.03744141  0.01239098 -0.03937192 -0.0298345  -0.0343102
   0.00640999 -0.0148654   0.02540577  0.02885345]
 [ 0.02053687  0.02199623 -0.02330047 -0.02183435 -0.04321543  0.02578304
  -0.03080188  0.02689299  0.02668724  0.01884701]
 [ 0.01881771 -0.01444805 -0.0136018  -0.03722728  0.01036897  0.00931306
   0.04988774 -0.02167592  0.00388153  0.00786781]
 [ 0.03661731  0.01564061 -0.00233229 -0.01629362  0.0405761  -0.017888
  -0.02762266 -0.0210503   0.01221222  0.04975149]]
