In [1]:
# List of sentences for practicing word embeddings
sentences = [
    "The quick brown fox jumps over the lazy dog.",
    "Artificial intelligence is transforming the world.",
    "Python is a versatile programming language.",
    "Machine learning models require a lot of data.",
    "Natural language processing is a fascinating field.",
    "Deep learning has revolutionized computer vision.",
    "Data science combines statistics and programming.",
    "Neural networks are inspired by the human brain.",
    "Big data analytics helps in making better decisions."
]

# Display the list of sentences
for i, sentence in enumerate(sentences, 1):
    print(f"{i}: {sentence}")

1: The quick brown fox jumps over the lazy dog.
2: Artificial intelligence is transforming the world.
3: Python is a versatile programming language.
4: Machine learning models require a lot of data.
5: Natural language processing is a fascinating field.
6: Deep learning has revolutionized computer vision.
7: Data science combines statistics and programming.
8: Neural networks are inspired by the human brain.
9: Big data analytics helps in making better decisions.


In [2]:
from tensorflow.keras.preprocessing.text import one_hot
vocabulary_size = 1000
# One-hot encoding of the first sentence
encoded_sentence = one_hot(sentences[0], vocabulary_size)
print(f"\nOne-hot encoded representation of the first sentence: {encoded_sentence}")



One-hot encoded representation of the first sentence: [892, 787, 484, 164, 464, 332, 892, 301, 258]


In [3]:
# One-hot encoding of all sentences

one_hot_encoded_sentences = [one_hot(senctence, vocabulary_size) for senctence in sentences]
one_hot_encoded_sentences

[[892, 787, 484, 164, 464, 332, 892, 301, 258],
 [267, 366, 808, 981, 892, 527],
 [703, 808, 274, 989, 686, 140],
 [241, 604, 708, 611, 274, 945, 600, 971],
 [859, 140, 800, 808, 274, 908, 93],
 [580, 604, 724, 41, 21, 604],
 [971, 996, 15, 846, 725, 686],
 [399, 87, 789, 145, 517, 892, 549, 477],
 [41, 971, 160, 803, 938, 505, 292, 286]]

In [4]:
from tensorflow.keras.layers import Embedding, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import pad_sequences
import numpy as np
# Padding the sequences to ensure uniform length
sent_length = 15

embedding_docs = pad_sequences(one_hot_encoded_sentences, maxlen=sent_length, padding='pre')
embedding_docs


array([[  0,   0,   0,   0,   0,   0, 892, 787, 484, 164, 464, 332, 892,
        301, 258],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0, 267, 366, 808, 981,
        892, 527],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0, 703, 808, 274, 989,
        686, 140],
       [  0,   0,   0,   0,   0,   0,   0, 241, 604, 708, 611, 274, 945,
        600, 971],
       [  0,   0,   0,   0,   0,   0,   0,   0, 859, 140, 800, 808, 274,
        908,  93],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0, 580, 604, 724,  41,
         21, 604],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0, 971, 996,  15, 846,
        725, 686],
       [  0,   0,   0,   0,   0,   0,   0, 399,  87, 789, 145, 517, 892,
        549, 477],
       [  0,   0,   0,   0,   0,   0,   0,  41, 971, 160, 803, 938, 505,
        292, 286]])

In [5]:
first_row = embedding_docs[0]
first_row

array([  0,   0,   0,   0,   0,   0, 892, 787, 484, 164, 464, 332, 892,
       301, 258])

In [6]:
for i, sentence in enumerate(embedding_docs):
    print(f"Sentence {i+1} after padding: {sentence}")

Sentence 1 after padding: [  0   0   0   0   0   0 892 787 484 164 464 332 892 301 258]
Sentence 2 after padding: [  0   0   0   0   0   0   0   0   0 267 366 808 981 892 527]
Sentence 3 after padding: [  0   0   0   0   0   0   0   0   0 703 808 274 989 686 140]
Sentence 4 after padding: [  0   0   0   0   0   0   0 241 604 708 611 274 945 600 971]
Sentence 5 after padding: [  0   0   0   0   0   0   0   0 859 140 800 808 274 908  93]
Sentence 6 after padding: [  0   0   0   0   0   0   0   0   0 580 604 724  41  21 604]
Sentence 7 after padding: [  0   0   0   0   0   0   0   0   0 971 996  15 846 725 686]
Sentence 8 after padding: [  0   0   0   0   0   0   0 399  87 789 145 517 892 549 477]
Sentence 9 after padding: [  0   0   0   0   0   0   0  41 971 160 803 938 505 292 286]


In [7]:
dim = 10
model = Sequential()
model.add(Embedding(vocabulary_size, dim, input_length=sent_length))
model.build(input_shape=(None, sent_length)) # Explicitly build the model
model.compile(optimizer='adam', loss='mse')



In [8]:
model.summary()

In [9]:
model.predict(embedding_docs)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 238ms/step


array([[[ 0.02511762, -0.01511731,  0.04658642, ..., -0.01283702,
          0.00598728,  0.0392575 ],
        [ 0.02511762, -0.01511731,  0.04658642, ..., -0.01283702,
          0.00598728,  0.0392575 ],
        [ 0.02511762, -0.01511731,  0.04658642, ..., -0.01283702,
          0.00598728,  0.0392575 ],
        ...,
        [-0.02112184, -0.01640648, -0.03515881, ..., -0.00531634,
          0.00903229, -0.01476438],
        [-0.0117575 ,  0.03966178, -0.03006696, ..., -0.00355604,
          0.01096009,  0.01946599],
        [-0.04990308,  0.02696748, -0.00717743, ..., -0.03220385,
          0.00622045,  0.04724799]],

       [[ 0.02511762, -0.01511731,  0.04658642, ..., -0.01283702,
          0.00598728,  0.0392575 ],
        [ 0.02511762, -0.01511731,  0.04658642, ..., -0.01283702,
          0.00598728,  0.0392575 ],
        [ 0.02511762, -0.01511731,  0.04658642, ..., -0.01283702,
          0.00598728,  0.0392575 ],
        ...,
        [-0.04185014, -0.02611398,  0.0402552 , ...,  

In [10]:
model.predict(embedding_docs[8].reshape(1, sent_length))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step


array([[[ 0.02511762, -0.01511731,  0.04658642,  0.04441735,
         -0.02667468,  0.02506334, -0.0104434 , -0.01283702,
          0.00598728,  0.0392575 ],
        [ 0.02511762, -0.01511731,  0.04658642,  0.04441735,
         -0.02667468,  0.02506334, -0.0104434 , -0.01283702,
          0.00598728,  0.0392575 ],
        [ 0.02511762, -0.01511731,  0.04658642,  0.04441735,
         -0.02667468,  0.02506334, -0.0104434 , -0.01283702,
          0.00598728,  0.0392575 ],
        [ 0.02511762, -0.01511731,  0.04658642,  0.04441735,
         -0.02667468,  0.02506334, -0.0104434 , -0.01283702,
          0.00598728,  0.0392575 ],
        [ 0.02511762, -0.01511731,  0.04658642,  0.04441735,
         -0.02667468,  0.02506334, -0.0104434 , -0.01283702,
          0.00598728,  0.0392575 ],
        [ 0.02511762, -0.01511731,  0.04658642,  0.04441735,
         -0.02667468,  0.02506334, -0.0104434 , -0.01283702,
          0.00598728,  0.0392575 ],
        [ 0.02511762, -0.01511731,  0.04658642,  0.0