### **Word Embedding Techniques using Embedding Layer in Keras**

https://machinelearningmastery.com/use-word-embedding-layers-deep-learning-keras/

In [1]:
### Libraries Used Tensorflow > 2.0  and keras

from tensorflow.keras.preprocessing.text import one_hot

In [2]:
### sentences
sent=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good',]

In [3]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [4]:
### Vocabulary size
voc_size=10000

#### **One Hot Representation**

In [5]:
onehot_repr=[one_hot(words,voc_size)for words in sent] 
print(onehot_repr)

[[6311, 8680, 9955, 1943], [6311, 8680, 9955, 8484], [6311, 4159, 9955, 4247], [2232, 6742, 1031, 863, 9198], [2232, 6742, 1031, 863, 350], [3716, 6311, 3582, 9955, 4862], [3317, 7307, 2381, 863]]


#### **Word Embedding Representation**

In [6]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
import numpy as np

In [7]:
sent_length=8
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)

[[   0    0    0    0 6311 8680 9955 1943]
 [   0    0    0    0 6311 8680 9955 8484]
 [   0    0    0    0 6311 4159 9955 4247]
 [   0    0    0 2232 6742 1031  863 9198]
 [   0    0    0 2232 6742 1031  863  350]
 [   0    0    0 3716 6311 3582 9955 4862]
 [   0    0    0    0 3317 7307 2381  863]]


In [8]:
dim = 10

model=Sequential()
model.add(Embedding(voc_size,10,input_length=sent_length))
model.compile('adam','mse')

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 8, 10)             100000    
Total params: 100,000
Trainable params: 100,000
Non-trainable params: 0
_________________________________________________________________


In [9]:
print(model.predict(embedded_docs))

[[[-7.2735921e-03 -2.0491814e-02  4.0273730e-02  1.4584351e-02
   -5.8905594e-03  4.0082667e-02  1.4630485e-02  4.6288002e-02
    3.1976882e-02  2.2828627e-02]
  [-7.2735921e-03 -2.0491814e-02  4.0273730e-02  1.4584351e-02
   -5.8905594e-03  4.0082667e-02  1.4630485e-02  4.6288002e-02
    3.1976882e-02  2.2828627e-02]
  [-7.2735921e-03 -2.0491814e-02  4.0273730e-02  1.4584351e-02
   -5.8905594e-03  4.0082667e-02  1.4630485e-02  4.6288002e-02
    3.1976882e-02  2.2828627e-02]
  [-7.2735921e-03 -2.0491814e-02  4.0273730e-02  1.4584351e-02
   -5.8905594e-03  4.0082667e-02  1.4630485e-02  4.6288002e-02
    3.1976882e-02  2.2828627e-02]
  [ 4.8640791e-02 -1.1943638e-02 -2.0579839e-02 -1.3378657e-02
   -8.6760297e-03  2.9508162e-02 -4.6362139e-02 -4.0125560e-02
    3.8351864e-04 -1.2898840e-02]
  [-4.4798922e-02 -4.6502899e-02  1.4803421e-02  8.1813931e-03
   -3.2554328e-02  1.8876407e-02 -4.0257551e-02  4.9536992e-02
    4.7723558e-02  2.2956122e-02]
  [-4.0918611e-02 -6.1189756e-03  4.4681

In [10]:
embedded_docs[0]

array([   0,    0,    0,    0, 6311, 8680, 9955, 1943], dtype=int32)

In [11]:
print(model.predict(embedded_docs)[0])

[[-0.00727359 -0.02049181  0.04027373  0.01458435 -0.00589056  0.04008267
   0.01463049  0.046288    0.03197688  0.02282863]
 [-0.00727359 -0.02049181  0.04027373  0.01458435 -0.00589056  0.04008267
   0.01463049  0.046288    0.03197688  0.02282863]
 [-0.00727359 -0.02049181  0.04027373  0.01458435 -0.00589056  0.04008267
   0.01463049  0.046288    0.03197688  0.02282863]
 [-0.00727359 -0.02049181  0.04027373  0.01458435 -0.00589056  0.04008267
   0.01463049  0.046288    0.03197688  0.02282863]
 [ 0.04864079 -0.01194364 -0.02057984 -0.01337866 -0.00867603  0.02950816
  -0.04636214 -0.04012556  0.00038352 -0.01289884]
 [-0.04479892 -0.0465029   0.01480342  0.00818139 -0.03255433  0.01887641
  -0.04025755  0.04953699  0.04772356  0.02295612]
 [-0.04091861 -0.00611898  0.04468187  0.01677244 -0.04980707  0.01551295
   0.02273038  0.0444576   0.01563516  0.04448918]
 [ 0.0172242   0.04099942 -0.03609883  0.03186383 -0.01844973  0.01872576
   0.02748326 -0.01451508 -0.01448847 -0.04574871]]