**Integer Encoding**

In [None]:
texts = ["Generative AI is intresting",
         "AI is transforming the world",
         "I want to know about AI more."]

In [None]:
from tensorflow.keras.preprocessing.text import Tokenizer

tokenizer = Tokenizer()

tokenizer.fit_on_texts(texts)

In [None]:
print(tokenizer.word_index)

{'ai': 1, 'is': 2, 'generative': 3, 'intresting': 4, 'transforming': 5, 'the': 6, 'world': 7, 'i': 8, 'want': 9, 'to': 10, 'know': 11, 'about': 12, 'more': 13}


In [None]:
sequences = tokenizer.texts_to_sequences(texts)
sequences

[[3, 1, 2, 4], [1, 2, 5, 6, 7], [8, 9, 10, 11, 12, 1, 13]]

In [None]:
from keras.utils import pad_sequences
padded_sequences = pad_sequences(sequences , padding = 'pre')
print(padded_sequences)

[[ 0  0  0  3  1  2  4]
 [ 0  0  1  2  5  6  7]
 [ 8  9 10 11 12  1 13]]


In [None]:
from keras.utils import to_categorical
to_categorical(padded_sequences)

array([[[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

       [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
      

**Keras Embedding**

In [None]:
from tensorflow.keras import models, layers

model = models.Sequential()

model.add(layers.Embedding(input_dim = 14 , output_dim = 4 , input_length = 7))

model.summary()

In [None]:
model.compile('adam')
word_vectors = model.predict(padded_sequences)
word_vectors

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step


array([[[-0.02381177, -0.01065052,  0.04395871,  0.03583583],
        [-0.02381177, -0.01065052,  0.04395871,  0.03583583],
        [-0.02381177, -0.01065052,  0.04395871,  0.03583583],
        [ 0.00783291,  0.03611405,  0.03131589, -0.02834582],
        [-0.01144272, -0.02928251,  0.03406086,  0.0447501 ],
        [ 0.03141769,  0.01571843,  0.0308475 , -0.0345671 ],
        [-0.02443269, -0.0146332 ,  0.0484466 ,  0.00436043]],

       [[-0.02381177, -0.01065052,  0.04395871,  0.03583583],
        [-0.02381177, -0.01065052,  0.04395871,  0.03583583],
        [-0.01144272, -0.02928251,  0.03406086,  0.0447501 ],
        [ 0.03141769,  0.01571843,  0.0308475 , -0.0345671 ],
        [-0.01121341,  0.03790139,  0.01519141, -0.0013973 ],
        [-0.01245753, -0.04483519, -0.03636069, -0.03099337],
        [ 0.02110014,  0.01558856,  0.04102587,  0.03271835]],

       [[-0.03765134, -0.03964809,  0.00427096,  0.04253462],
        [ 0.00750596,  0.02246774, -0.02999684, -0.02422894],
    

In [None]:
#Sentence 1 - Generative AI is intresting
word_vectors[0].flatten()

array([-0.02381177, -0.01065052,  0.04395871,  0.03583583, -0.02381177,
       -0.01065052,  0.04395871,  0.03583583, -0.02381177, -0.01065052,
        0.04395871,  0.03583583,  0.00783291,  0.03611405,  0.03131589,
       -0.02834582, -0.01144272, -0.02928251,  0.03406086,  0.0447501 ,
        0.03141769,  0.01571843,  0.0308475 , -0.0345671 , -0.02443269,
       -0.0146332 ,  0.0484466 ,  0.00436043], dtype=float32)

In [None]:
#Sentence 2 -
word_vectors[1].flatten()

array([-0.02381177, -0.01065052,  0.04395871,  0.03583583, -0.02381177,
       -0.01065052,  0.04395871,  0.03583583, -0.01144272, -0.02928251,
        0.03406086,  0.0447501 ,  0.03141769,  0.01571843,  0.0308475 ,
       -0.0345671 , -0.01121341,  0.03790139,  0.01519141, -0.0013973 ,
       -0.01245753, -0.04483519, -0.03636069, -0.03099337,  0.02110014,
        0.01558856,  0.04102587,  0.03271835], dtype=float32)

In [None]:
#Sentence 2 -
word_vectors[2].flatten()

array([-0.03765134, -0.03964809,  0.00427096,  0.04253462,  0.00750596,
        0.02246774, -0.02999684, -0.02422894,  0.01100043,  0.03670153,
        0.02674898, -0.0162141 , -0.02640659, -0.00245818, -0.02988034,
       -0.0208643 ,  0.0314193 , -0.02648121, -0.04834132, -0.00164977,
       -0.01144272, -0.02928251,  0.03406086,  0.0447501 ,  0.01094855,
        0.00687166,  0.03622177,  0.00420123], dtype=float32)