# Word Embedding Techniques using Embedding Layer in Keras

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:

##tensorflow >2.0
from tensorflow.keras.preprocessing.text import one_hot

### sentences

In [3]:

sent=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

In [4]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [5]:
### Vocabulary size
voc_size=10000

### One Hot Representation

In [6]:
onehot_repr=[one_hot(words,voc_size) for words in sent] 
print(onehot_repr)

[[6950, 1711, 7649, 8265], [6950, 1711, 7649, 1538], [6950, 86, 7649, 9128], [2409, 3234, 2370, 3314, 7179], [2409, 3234, 2370, 3314, 8832], [7140, 6950, 1028, 7649, 7814], [5988, 5571, 6663, 3314]]


## Word Embedding Representation

In [7]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences   
from tensorflow.keras.models import Sequential

####  pad_sequences - all sentence should have same number of words and it will help to create a very good metrics

In [8]:
sent_length=8
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[   0    0    0    0 6950 1711 7649 8265]
 [   0    0    0    0 6950 1711 7649 1538]
 [   0    0    0    0 6950   86 7649 9128]
 [   0    0    0 2409 3234 2370 3314 7179]
 [   0    0    0 2409 3234 2370 3314 8832]
 [   0    0    0 7140 6950 1028 7649 7814]
 [   0    0    0    0 5988 5571 6663 3314]]


In [9]:
dim=10

In [10]:

model=Sequential()
model.add(Embedding(voc_size,10,input_length=sent_length))
model.compile('adam','mse')


In [11]:

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [12]:
print(model.predict(embedded_docs))

[[[ 0.02760107  0.03994007  0.03337399 -0.00675526  0.02366166
   -0.04707533  0.02997899  0.01246329  0.01414538  0.03298087]
  [ 0.02760107  0.03994007  0.03337399 -0.00675526  0.02366166
   -0.04707533  0.02997899  0.01246329  0.01414538  0.03298087]
  [ 0.02760107  0.03994007  0.03337399 -0.00675526  0.02366166
   -0.04707533  0.02997899  0.01246329  0.01414538  0.03298087]
  [ 0.02760107  0.03994007  0.03337399 -0.00675526  0.02366166
   -0.04707533  0.02997899  0.01246329  0.01414538  0.03298087]
  [ 0.02843882 -0.00164281  0.04216808 -0.00610459  0.03803358
    0.04422314 -0.03188549  0.02125886  0.01266861  0.01727979]
  [-0.00143244  0.01807064  0.01897315  0.0049146   0.01374686
   -0.0371105   0.0068697   0.01262089  0.00196812 -0.0110807 ]
  [ 0.04820624 -0.04688386 -0.02245374 -0.02365856 -0.03172921
   -0.01730746 -0.02879286  0.01833005  0.04632426 -0.01484304]
  [-0.00599844 -0.0422464  -0.010943   -0.03232906 -0.02712141
    0.0250338   0.02309319  0.00533264  0.026129

In [13]:
embedded_docs[0]

array([   0,    0,    0,    0, 6950, 1711, 7649, 8265])

In [14]:

print(model.predict(embedded_docs)[0])

[[ 0.02760107  0.03994007  0.03337399 -0.00675526  0.02366166 -0.04707533
   0.02997899  0.01246329  0.01414538  0.03298087]
 [ 0.02760107  0.03994007  0.03337399 -0.00675526  0.02366166 -0.04707533
   0.02997899  0.01246329  0.01414538  0.03298087]
 [ 0.02760107  0.03994007  0.03337399 -0.00675526  0.02366166 -0.04707533
   0.02997899  0.01246329  0.01414538  0.03298087]
 [ 0.02760107  0.03994007  0.03337399 -0.00675526  0.02366166 -0.04707533
   0.02997899  0.01246329  0.01414538  0.03298087]
 [ 0.02843882 -0.00164281  0.04216808 -0.00610459  0.03803358  0.04422314
  -0.03188549  0.02125886  0.01266861  0.01727979]
 [-0.00143244  0.01807064  0.01897315  0.0049146   0.01374686 -0.0371105
   0.0068697   0.01262089  0.00196812 -0.0110807 ]
 [ 0.04820624 -0.04688386 -0.02245374 -0.02365856 -0.03172921 -0.01730746
  -0.02879286  0.01833005  0.04632426 -0.01484304]
 [-0.00599844 -0.0422464  -0.010943   -0.03232906 -0.02712141  0.0250338
   0.02309319  0.00533264  0.0261297  -0.00653768]]


# This is a basic of Embedding Layers