# Word embedding techniques using Embedding layer in Keras

In [1]:
from tensorflow.keras.preprocessing.text import one_hot

In [2]:
sent=['the glass of milk',
     'the glass of juice',
     'the cup of tea',
     'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good']
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [3]:
#vocabulary size
voc_size=10000

One Hot Representation

In [4]:
onehot_rep=[one_hot(words,voc_size)for words in sent]
print(onehot_rep)

[[7220, 7581, 9970, 6253], [7220, 7581, 9970, 9149], [7220, 5266, 9970, 2656], [6184, 6553, 9992, 526, 3655], [6184, 6553, 9992, 526, 5964], [5545, 7220, 4222, 9970, 6852], [3979, 3958, 4082, 526]]


**Word Embedding Representation**

In [6]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

In [8]:
sent_len=8
#padding=pre addes 0 to the front
embedded_docs=pad_sequences(onehot_rep,padding='pre',maxlen=sent_len)
print(embedded_docs)

[[   0    0    0    0 7220 7581 9970 6253]
 [   0    0    0    0 7220 7581 9970 9149]
 [   0    0    0    0 7220 5266 9970 2656]
 [   0    0    0 6184 6553 9992  526 3655]
 [   0    0    0 6184 6553 9992  526 5964]
 [   0    0    0 5545 7220 4222 9970 6852]
 [   0    0    0    0 3979 3958 4082  526]]


In [9]:
dim=15

In [11]:
model=Sequential()
model.add(Embedding(voc_size,10,input_length=sent_len))
model.compile('adam','mse')

In [12]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [13]:
print(model.predict(embedded_docs))

[[[ 0.03147743  0.02656661 -0.03907777  0.00538928 -0.01277488
    0.0457696  -0.04285074  0.02408946 -0.03543677  0.02478757]
  [ 0.03147743  0.02656661 -0.03907777  0.00538928 -0.01277488
    0.0457696  -0.04285074  0.02408946 -0.03543677  0.02478757]
  [ 0.03147743  0.02656661 -0.03907777  0.00538928 -0.01277488
    0.0457696  -0.04285074  0.02408946 -0.03543677  0.02478757]
  [ 0.03147743  0.02656661 -0.03907777  0.00538928 -0.01277488
    0.0457696  -0.04285074  0.02408946 -0.03543677  0.02478757]
  [ 0.0318025  -0.03486433  0.04381151 -0.02800925 -0.02216991
   -0.00454361  0.00937456  0.00530289 -0.00389381  0.00465148]
  [ 0.00181343  0.03320623  0.02271677 -0.00243205 -0.0275746
    0.02081    -0.04397174  0.00361282  0.03117498  0.04876057]
  [-0.00346177 -0.03132806  0.02340728  0.04147964 -0.04607162
   -0.04735309  0.02422732 -0.03555065  0.01558931  0.03751099]
  [ 0.04034307  0.01720374  0.03227739 -0.01697105 -0.00599859
   -0.02635252 -0.02392241 -0.03559051  0.0310891

In [14]:
embedded_docs[0]

array([   0,    0,    0,    0, 7220, 7581, 9970, 6253])

In [15]:
print(model.predict(embedded_docs)[0])

[[ 0.03147743  0.02656661 -0.03907777  0.00538928 -0.01277488  0.0457696
  -0.04285074  0.02408946 -0.03543677  0.02478757]
 [ 0.03147743  0.02656661 -0.03907777  0.00538928 -0.01277488  0.0457696
  -0.04285074  0.02408946 -0.03543677  0.02478757]
 [ 0.03147743  0.02656661 -0.03907777  0.00538928 -0.01277488  0.0457696
  -0.04285074  0.02408946 -0.03543677  0.02478757]
 [ 0.03147743  0.02656661 -0.03907777  0.00538928 -0.01277488  0.0457696
  -0.04285074  0.02408946 -0.03543677  0.02478757]
 [ 0.0318025  -0.03486433  0.04381151 -0.02800925 -0.02216991 -0.00454361
   0.00937456  0.00530289 -0.00389381  0.00465148]
 [ 0.00181343  0.03320623  0.02271677 -0.00243205 -0.0275746   0.02081
  -0.04397174  0.00361282  0.03117498  0.04876057]
 [-0.00346177 -0.03132806  0.02340728  0.04147964 -0.04607162 -0.04735309
   0.02422732 -0.03555065  0.01558931  0.03751099]
 [ 0.04034307  0.01720374  0.03227739 -0.01697105 -0.00599859 -0.02635252
  -0.02392241 -0.03559051  0.03108915  0.01745334]]
