In [1]:
!pip install tensorflow



In [2]:
import tensorflow as tf


In [3]:
print(tf.__version__)

2.12.0


In [4]:
##tensorflow >2.0
from tensorflow.keras.preprocessing.text import one_hot      ##Apply One Hot encoding for the preprocessing of the text

In [5]:
### sentences
sent=[  'the glass of milk',
     'the glass of juice',
     'the cup of tea',
    'I am a good boy',
     'I am a good developer',
     'understand the meaning of words',
     'your videos are good']

In [6]:
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [7]:
### Vocabulary size             ##Take the maximum number of unique words in the sentence as 500 i.e vocabulary size is 500
voc_size=500

In [8]:
for words in sent:
  print(words)

the glass of milk
the glass of juice
the cup of tea
I am a good boy
I am a good developer
understand the meaning of words
your videos are good


In [9]:
# One Hot Representation
onehot_repr=[one_hot(words,voc_size)for words in sent]           ##it gives the index number for each words present in the sentence i.e out of the 500 unique words at which index the following given words of sentence are present
print(onehot_repr)

[[314, 294, 67, 124], [314, 294, 67, 116], [314, 494, 67, 451], [77, 136, 10, 70, 88], [77, 136, 10, 70, 258], [455, 314, 209, 67, 177], [193, 36, 323, 70]]


In [10]:
##Word Embedding Representation
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [11]:
import numpy as np

In [12]:
## pre padding
sent_length=8              ##Maximum length of sentence is 8... so that to trained the Neural Network we first have to convert the each sentence to the maximum length by using the prepadding
embedded_docs=pad_sequences(onehot_repr,padding='pre',maxlen=sent_length)
print(embedded_docs)   ## print the embedded docs

[[  0   0   0   0 314 294  67 124]
 [  0   0   0   0 314 294  67 116]
 [  0   0   0   0 314 494  67 451]
 [  0   0   0  77 136  10  70  88]
 [  0   0   0  77 136  10  70 258]
 [  0   0   0 455 314 209  67 177]
 [  0   0   0   0 193  36 323  70]]


In [13]:
## 10 feature dimesnions
dim=10                        ##suppose the total dimension as 10 i.e total number of features is 10 i.e each and every word should be represented with 10 different values

In [14]:
##Create the embedding layers
model=Sequential()
model.add(Embedding(voc_size,10,input_length=sent_length))     ##gives the parameters as the maximum vocabulary size i.e 500, feature_dimension i.e 10, take the input length as the sent_length and use the optimizer as adam optimizer, loss as mse i.e mean squared error
model.compile('adam','mse')

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 8, 10)             5000      
                                                                 
Total params: 5,000
Trainable params: 5,000
Non-trainable params: 0
_________________________________________________________________


In [16]:
## for e.g 'the glass of milk'
embedded_docs[0]

array([  0,   0,   0,   0, 314, 294,  67, 124], dtype=int32)

In [17]:
model.predict(embedded_docs[0])  ##Here, it gives the numeric data for each word represented by 10 different values



array([[ 0.01090016, -0.03226825, -0.0268811 ,  0.01974727, -0.04785923,
         0.01987359,  0.03860131, -0.01374207, -0.03794643, -0.00306908],
       [ 0.01090016, -0.03226825, -0.0268811 ,  0.01974727, -0.04785923,
         0.01987359,  0.03860131, -0.01374207, -0.03794643, -0.00306908],
       [ 0.01090016, -0.03226825, -0.0268811 ,  0.01974727, -0.04785923,
         0.01987359,  0.03860131, -0.01374207, -0.03794643, -0.00306908],
       [ 0.01090016, -0.03226825, -0.0268811 ,  0.01974727, -0.04785923,
         0.01987359,  0.03860131, -0.01374207, -0.03794643, -0.00306908],
       [ 0.03780985, -0.04307367,  0.00915509,  0.00639266, -0.03835129,
        -0.0331706 ,  0.02021641, -0.00455648,  0.01315368,  0.01438601],
       [ 0.02188213, -0.01414035,  0.04393785,  0.03501965,  0.01522707,
        -0.04994918,  0.00272436,  0.03259507,  0.02230502, -0.00084081],
       [ 0.03392893, -0.00923955, -0.03165269, -0.00986107,  0.00274793,
        -0.04299071, -0.03793531, -0.00665325

In [18]:
model.predict(embedded_docs)



array([[[ 0.01090016, -0.03226825, -0.0268811 ,  0.01974727,
         -0.04785923,  0.01987359,  0.03860131, -0.01374207,
         -0.03794643, -0.00306908],
        [ 0.01090016, -0.03226825, -0.0268811 ,  0.01974727,
         -0.04785923,  0.01987359,  0.03860131, -0.01374207,
         -0.03794643, -0.00306908],
        [ 0.01090016, -0.03226825, -0.0268811 ,  0.01974727,
         -0.04785923,  0.01987359,  0.03860131, -0.01374207,
         -0.03794643, -0.00306908],
        [ 0.01090016, -0.03226825, -0.0268811 ,  0.01974727,
         -0.04785923,  0.01987359,  0.03860131, -0.01374207,
         -0.03794643, -0.00306908],
        [ 0.03780985, -0.04307367,  0.00915509,  0.00639266,
         -0.03835129, -0.0331706 ,  0.02021641, -0.00455648,
          0.01315368,  0.01438601],
        [ 0.02188213, -0.01414035,  0.04393785,  0.03501965,
          0.01522707, -0.04994918,  0.00272436,  0.03259507,
          0.02230502, -0.00084081],
        [ 0.03392893, -0.00923955, -0.03165269, -0.0

In [19]:
embedded_docs[0]

array([  0,   0,   0,   0, 314, 294,  67, 124], dtype=int32)

In [20]:
print(model.predict(embedded_docs)[0])

[[ 0.01090016 -0.03226825 -0.0268811   0.01974727 -0.04785923  0.01987359
   0.03860131 -0.01374207 -0.03794643 -0.00306908]
 [ 0.01090016 -0.03226825 -0.0268811   0.01974727 -0.04785923  0.01987359
   0.03860131 -0.01374207 -0.03794643 -0.00306908]
 [ 0.01090016 -0.03226825 -0.0268811   0.01974727 -0.04785923  0.01987359
   0.03860131 -0.01374207 -0.03794643 -0.00306908]
 [ 0.01090016 -0.03226825 -0.0268811   0.01974727 -0.04785923  0.01987359
   0.03860131 -0.01374207 -0.03794643 -0.00306908]
 [ 0.03780985 -0.04307367  0.00915509  0.00639266 -0.03835129 -0.0331706
   0.02021641 -0.00455648  0.01315368  0.01438601]
 [ 0.02188213 -0.01414035  0.04393785  0.03501965  0.01522707 -0.04994918
   0.00272436  0.03259507  0.02230502 -0.00084081]
 [ 0.03392893 -0.00923955 -0.03165269 -0.00986107  0.00274793 -0.04299071
  -0.03793531 -0.00665325  0.02258784  0.04462271]
 [-0.0449338  -0.04202926  0.01338762  0.04850719 -0.03011226 -0.00847449
   0.00443894 -0.00841982  0.03021793  0.02249861]]
