In [47]:
# Import required libraries
from tensorflow.keras.layers import Embedding
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import one_hot

In [49]:
# Sentences
sent = [
    'the glass of milk',
    'the glass of juice',
    'the cup of tea',
    'I am a good boy',
    'I am a good developer',
    'understand the meaning of words',
    'your videos are good'
]
# Vocabulary size
voc_size = 500
sent

['the glass of milk',
 'the glass of juice',
 'the cup of tea',
 'I am a good boy',
 'I am a good developer',
 'understand the meaning of words',
 'your videos are good']

In [50]:
# One-hot representation of the sentences
onehot_repr = [one_hot(words, voc_size) for words in sent]
print("One-hot representation:\n", onehot_repr)

One-hot representation:
 [[278, 274, 220, 382], [278, 274, 220, 80], [278, 321, 220, 430], [167, 102, 497, 308, 489], [167, 102, 497, 308, 84], [286, 278, 56, 220, 7], [382, 361, 285, 308]]


In [51]:
# Define the maximum sentence length and pad sequences
sent_length = 8
embedded_docs = pad_sequences(onehot_repr, padding='pre', maxlen=sent_length)
print("\nPadded sequences:\n", embedded_docs)



Padded sequences:
 [[  0   0   0   0 278 274 220 382]
 [  0   0   0   0 278 274 220  80]
 [  0   0   0   0 278 321 220 430]
 [  0   0   0 167 102 497 308 489]
 [  0   0   0 167 102 497 308  84]
 [  0   0   0 286 278  56 220   7]
 [  0   0   0   0 382 361 285 308]]


In [52]:
# Define the Sequential model with an Embedding layer
model = Sequential()
model.add(Embedding(input_dim=voc_size, output_dim=10, input_length=sent_length))
model.compile('adam', 'mse')
model.summary()  # Display model architecture




In [53]:
# Predict the output using the model
output = model.predict(embedded_docs)
print("\nModel Output Shape:", output.shape)
print("\nModel Output:\n", output)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 99ms/step

Model Output Shape: (7, 8, 10)

Model Output:
 [[[ 0.01648333  0.03956585 -0.01311998  0.02124352  0.04993966
    0.01981818  0.01894465  0.02243893  0.02272158 -0.00491359]
  [ 0.01648333  0.03956585 -0.01311998  0.02124352  0.04993966
    0.01981818  0.01894465  0.02243893  0.02272158 -0.00491359]
  [ 0.01648333  0.03956585 -0.01311998  0.02124352  0.04993966
    0.01981818  0.01894465  0.02243893  0.02272158 -0.00491359]
  [ 0.01648333  0.03956585 -0.01311998  0.02124352  0.04993966
    0.01981818  0.01894465  0.02243893  0.02272158 -0.00491359]
  [-0.03579132 -0.03898908  0.01086549  0.02517673 -0.01184323
    0.00940056  0.03689772  0.00653332  0.00696944  0.0252238 ]
  [-0.03988162  0.00877814  0.03086667  0.01188736 -0.04698151
   -0.01752643  0.02207167 -0.04783504 -0.02332926 -0.04997479]
  [ 0.03994802 -0.01654726  0.00330427 -0.03984598  0.01634902
   -0.03451488  0.02185005 -0.01026659 -0.03302959 -0.0

In [54]:
### Assignment

sent=["The world is a better place",
      "Marvel series is my favourite movie",
      "I like DC movies",
      "the cat is eating the food",
      "Tom and Jerry is my favourite movie",
      "Python is my favourite programming language"
      ]

In [57]:
vocab_size=1000

In [58]:
onehot_repr = [one_hot(words, voc_size) for words in sent]
print("One-hot representation:\n", onehot_repr)

One-hot representation:
 [[278, 60, 262, 497, 233, 78], [207, 304, 262, 199, 68, 403], [167, 492, 344, 254], [278, 182, 262, 99, 278, 495], [127, 301, 229, 262, 199, 68, 403], [143, 262, 199, 68, 459, 372]]


In [59]:
sent_length = 10
embedded_docs = pad_sequences(onehot_repr, padding='pre', maxlen=sent_length)
print("\nPadded sequences:\n", embedded_docs)


Padded sequences:
 [[  0   0   0   0 278  60 262 497 233  78]
 [  0   0   0   0 207 304 262 199  68 403]
 [  0   0   0   0   0   0 167 492 344 254]
 [  0   0   0   0 278 182 262  99 278 495]
 [  0   0   0 127 301 229 262 199  68 403]
 [  0   0   0   0 143 262 199  68 459 372]]


In [60]:
model = Sequential()
model.add(Embedding(input_dim=voc_size, output_dim=10, input_length=sent_length))
model.compile('adam', 'mse')
model.summary()



In [61]:
output = model.predict(embedded_docs)
print("\nModel Output Shape:", output.shape)
print("\nModel Output:\n", output)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 146ms/step

Model Output Shape: (6, 10, 10)

Model Output:
 [[[ 3.71217728e-03  3.42678465e-02 -2.76594888e-02 -1.04963891e-02
    2.32547037e-02  2.18410157e-02 -1.97427515e-02 -3.72750051e-02
    3.83162387e-02 -4.60077450e-03]
  [ 3.71217728e-03  3.42678465e-02 -2.76594888e-02 -1.04963891e-02
    2.32547037e-02  2.18410157e-02 -1.97427515e-02 -3.72750051e-02
    3.83162387e-02 -4.60077450e-03]
  [ 3.71217728e-03  3.42678465e-02 -2.76594888e-02 -1.04963891e-02
    2.32547037e-02  2.18410157e-02 -1.97427515e-02 -3.72750051e-02
    3.83162387e-02 -4.60077450e-03]
  [ 3.71217728e-03  3.42678465e-02 -2.76594888e-02 -1.04963891e-02
    2.32547037e-02  2.18410157e-02 -1.97427515e-02 -3.72750051e-02
    3.83162387e-02 -4.60077450e-03]
  [ 7.82706589e-03  4.34530415e-02  4.50358875e-02  4.76084463e-02
    1.04049072e-02 -3.31107005e-02 -2.67907511e-02  2.51978748e-02
    1.46827586e-02  3.04302238e-02]
  [-2.48670112e-02  2.976873