In [1]:
import tensorflow as tf
import warnings
warnings.filterwarnings("ignore")
print(tf.__version__)

2.19.0


In [2]:
from tensorflow.keras.preprocessing.text import one_hot

In [3]:
sentences = ['The Times is a British daily national newspaper based in London',
             'It began in 1785 under the title The Daily Universal Register',
             'The Times and its sister paper The Sunday Times (founded in 1821',
             'The Times had an average daily circulation of 365,880 in March 2020',
             'The Times was founded by publisher John Walter']

In [4]:
voc_size = 1000

In [5]:
onehot_rep = [one_hot(words, voc_size) for words in sentences]
onehot_rep

[[990, 672, 847, 155, 568, 931, 978, 809, 352, 729, 717],
 [141, 759, 729, 345, 806, 990, 13, 990, 931, 501, 646],
 [990, 672, 157, 676, 885, 213, 990, 630, 672, 638, 729, 794],
 [990, 672, 924, 932, 458, 931, 746, 410, 413, 175, 729, 871, 255],
 [990, 672, 601, 638, 417, 130, 328, 260]]

In [6]:
import numpy as np
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

In [7]:
sent_length = 8
embedded_docs = pad_sequences(onehot_rep, padding='pre', maxlen=sent_length)
embedded_docs

array([[155, 568, 931, 978, 809, 352, 729, 717],
       [345, 806, 990,  13, 990, 931, 501, 646],
       [885, 213, 990, 630, 672, 638, 729, 794],
       [931, 746, 410, 413, 175, 729, 871, 255],
       [990, 672, 601, 638, 417, 130, 328, 260]], dtype=int32)

In [8]:
embedding_dim= 10

In [9]:
model = Sequential()
model.add(Embedding(input_dim = voc_size, output_dim=embedding_dim,input_length=sent_length ))
model.build(input_shape=(None, sent_length))
model.compile(optimizer='adam', loss= 'mse')

In [10]:
model.summary()

In [11]:
print(model.predict(embedded_docs))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step
[[[ 0.04325541  0.01946891 -0.01740294 -0.03229322  0.00364171
    0.0213053  -0.03694851 -0.01224737 -0.01398756 -0.00712992]
  [-0.03957983  0.04761181 -0.01531913 -0.02063644  0.01687035
   -0.0292887  -0.00930594  0.02249769 -0.02285295 -0.02039111]
  [-0.00421754  0.01728946 -0.0442047  -0.03719028 -0.04376518
    0.03940269 -0.03624731 -0.00757959 -0.03815645  0.02725774]
  [-0.03683973 -0.04675354 -0.03247069  0.00749434  0.03801973
    0.03014808  0.04994521 -0.02409705 -0.00486051 -0.01437543]
  [ 0.04053639  0.01954657  0.02346537 -0.00622616  0.02193877
   -0.02013962  0.03304308 -0.00286102 -0.01161013  0.01379745]
  [-0.01861763  0.0125181   0.02809394 -0.01598753  0.02573248
   -0.03007754 -0.02684621 -0.02917262  0.04421413 -0.04005098]
  [ 0.01191729 -0.02785038 -0.03114674 -0.03251795  0.01860181
   -0.01372131  0.04407411 -0.0050614  -0.0167804   0.04503966]
  [-0.04627556  0.02952795  0.01862793

In [12]:
embedded_docs[0]

array([155, 568, 931, 978, 809, 352, 729, 717], dtype=int32)

In [13]:
model.predict(np.array([embedded_docs[0]]))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step


array([[[ 0.04325541,  0.01946891, -0.01740294, -0.03229322,
          0.00364171,  0.0213053 , -0.03694851, -0.01224737,
         -0.01398756, -0.00712992],
        [-0.03957983,  0.04761181, -0.01531913, -0.02063644,
          0.01687035, -0.0292887 , -0.00930594,  0.02249769,
         -0.02285295, -0.02039111],
        [-0.00421754,  0.01728946, -0.0442047 , -0.03719028,
         -0.04376518,  0.03940269, -0.03624731, -0.00757959,
         -0.03815645,  0.02725774],
        [-0.03683973, -0.04675354, -0.03247069,  0.00749434,
          0.03801973,  0.03014808,  0.04994521, -0.02409705,
         -0.00486051, -0.01437543],
        [ 0.04053639,  0.01954657,  0.02346537, -0.00622616,
          0.02193877, -0.02013962,  0.03304308, -0.00286102,
         -0.01161013,  0.01379745],
        [-0.01861763,  0.0125181 ,  0.02809394, -0.01598753,
          0.02573248, -0.03007754, -0.02684621, -0.02917262,
          0.04421413, -0.04005098],
        [ 0.01191729, -0.02785038, -0.03114674, -0.0