# Word Embedding technique using Keras 

In [1]:
# Libraries used is tensroflow and keras

In [2]:
from tensorflow.keras.preprocessing.text import one_hot

In [11]:
# Sample sentneces
sent=['I will have to work overtime every day next week.',
     'I have to go to work.',
     'He was going to like the clothes she bought for the trip.',
     'We will just wait for her.']

In [12]:
sent

['I will have to work overtime every day next week.',
 'I have to go to work.',
 'He was going to like the clothes she bought for the trip.',
 'We will just wait for her.']

In [13]:
# Parameters

VOC_SIZE=1000    # Vocabulary size

### One hot Representation

In [14]:
oneHot_Repr=[one_hot(words,VOC_SIZE) for words in sent]
print(oneHot_Repr)

[[992, 874, 776, 715, 410, 327, 233, 912, 837, 926], [992, 776, 715, 747, 715, 410], [822, 755, 182, 715, 981, 662, 671, 399, 166, 630, 662, 894], [121, 874, 871, 204, 630, 257]]


### Word Embedding Representation

In [15]:
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
# Pad Sequence is used to bind every sentences to same length
# Add 0s
from tensorflow.keras.models import Sequential

In [16]:
SENT_LENGTH=15
padded_sent=pad_sequences(oneHot_Repr,padding='pre',maxlen=SENT_LENGTH)
print(padded_sent)

[[  0   0   0   0   0 992 874 776 715 410 327 233 912 837 926]
 [  0   0   0   0   0   0   0   0   0 992 776 715 747 715 410]
 [  0   0   0 822 755 182 715 981 662 671 399 166 630 662 894]
 [  0   0   0   0   0   0   0   0   0 121 874 871 204 630 257]]


In [31]:
# Setting Dimensions of matrix to 15
DIM=5

# TO proceed further we need to create model
model=Sequential()
model.add(Embedding(VOC_SIZE,10,input_length=SENT_LENGTH))
model.compile(optimizer='adam',loss='mse')
 

In [32]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_3 (Embedding)     (None, 15, 10)            10000     
                                                                 
Total params: 10,000
Trainable params: 10,000
Non-trainable params: 0
_________________________________________________________________


In [33]:
print(model.predict(padded_sent))

[[[-3.94658446e-02  1.31381266e-02  1.01696029e-02  1.08289123e-02
    3.35750245e-02  4.54565398e-02 -4.39370796e-03  4.18706052e-02
   -3.31837088e-02 -4.37119119e-02]
  [-3.94658446e-02  1.31381266e-02  1.01696029e-02  1.08289123e-02
    3.35750245e-02  4.54565398e-02 -4.39370796e-03  4.18706052e-02
   -3.31837088e-02 -4.37119119e-02]
  [-3.94658446e-02  1.31381266e-02  1.01696029e-02  1.08289123e-02
    3.35750245e-02  4.54565398e-02 -4.39370796e-03  4.18706052e-02
   -3.31837088e-02 -4.37119119e-02]
  [-3.94658446e-02  1.31381266e-02  1.01696029e-02  1.08289123e-02
    3.35750245e-02  4.54565398e-02 -4.39370796e-03  4.18706052e-02
   -3.31837088e-02 -4.37119119e-02]
  [-3.94658446e-02  1.31381266e-02  1.01696029e-02  1.08289123e-02
    3.35750245e-02  4.54565398e-02 -4.39370796e-03  4.18706052e-02
   -3.31837088e-02 -4.37119119e-02]
  [-2.02918295e-02  2.62456015e-03 -1.62682049e-02  3.46190669e-02
   -3.33103314e-02  2.05795504e-02 -8.18474218e-03 -4.20297757e-02
   -2.58183368e-

In [34]:
padded_sent[0]

array([  0,   0,   0,   0,   0, 992, 874, 776, 715, 410, 327, 233, 912,
       837, 926])

In [35]:
print(model.predict(padded_sent)[0])

[[-3.9465845e-02  1.3138127e-02  1.0169603e-02  1.0828912e-02
   3.3575024e-02  4.5456540e-02 -4.3937080e-03  4.1870605e-02
  -3.3183709e-02 -4.3711912e-02]
 [-3.9465845e-02  1.3138127e-02  1.0169603e-02  1.0828912e-02
   3.3575024e-02  4.5456540e-02 -4.3937080e-03  4.1870605e-02
  -3.3183709e-02 -4.3711912e-02]
 [-3.9465845e-02  1.3138127e-02  1.0169603e-02  1.0828912e-02
   3.3575024e-02  4.5456540e-02 -4.3937080e-03  4.1870605e-02
  -3.3183709e-02 -4.3711912e-02]
 [-3.9465845e-02  1.3138127e-02  1.0169603e-02  1.0828912e-02
   3.3575024e-02  4.5456540e-02 -4.3937080e-03  4.1870605e-02
  -3.3183709e-02 -4.3711912e-02]
 [-3.9465845e-02  1.3138127e-02  1.0169603e-02  1.0828912e-02
   3.3575024e-02  4.5456540e-02 -4.3937080e-03  4.1870605e-02
  -3.3183709e-02 -4.3711912e-02]
 [-2.0291829e-02  2.6245601e-03 -1.6268205e-02  3.4619067e-02
  -3.3310331e-02  2.0579550e-02 -8.1847422e-03 -4.2029776e-02
  -2.5818337e-02  3.6867622e-02]
 [-1.3679564e-02 -3.3114314e-02 -2.0552909e-02 -5.1007755e