In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.layers import Embedding

In [2]:
reviews = [
    'nice food',
    'amazing restaurent',
    'too good',
    'just loved it!',
    'will go again',
    'horrible food',
    'never go there',
    'poor service',
    'poor qualitiy',
    'needs improvement'
]

sentiment = np.array([1,1,1,1,1,0,0,0,0,0])

In [3]:
one_hot('amazing restaurent',30)


[6, 18]

In [4]:
vocab_size = 30
encoded_reviews = [one_hot(d, vocab_size) for d in reviews]
encoded_reviews #Here we are assigning words above a number. for example: nice word assigned number 29 and amazing assigned 6 as number.

[[21, 29],
 [6, 18],
 [2, 1],
 [6, 25, 21],
 [9, 6, 26],
 [23, 29],
 [13, 6, 9],
 [11, 1],
 [11, 8],
 [1, 28]]

In [5]:
max_length = 3 #in this we see that some words are of length two and some of them are of length three.
# So we assign the zeros to the smaller length words like nice food. and at length 3 0 will be assigned in vector
padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post')
padded_reviews

array([[21, 29,  0],
       [ 6, 18,  0],
       [ 2,  1,  0],
       [ 6, 25, 21],
       [ 9,  6, 26],
       [23, 29,  0],
       [13,  6,  9],
       [11,  1,  0],
       [11,  8,  0],
       [ 1, 28,  0]])

In [11]:
embed_vector_size = 5

model = Sequential()
model.add(Embedding(input_dim =vocab_size,output_dim = embed_vector_size, input_length= max_length, name='embedding')) #name it to use again in other cells or next cells
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))




In [12]:
X = padded_reviews
y = sentiment

In [13]:
model.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = ['accuracy']
)

model.summary()

In [14]:
model.fit(X, y, epochs=50, verbose=0) 

<keras.src.callbacks.history.History at 0x2a149f1cda0>

In [15]:
loss, accuracy = model.evaluate(X, y)
accuracy

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 350ms/step - accuracy: 1.0000 - loss: 0.6111


1.0

In [16]:
weights = model.get_layer('embedding').get_weights()[0]
len(weights) #same weight that will be on vocab size

30

In [17]:
#lets lock at 'nice' & 'amazing' words. Those have numbers 29 & 3.
weights[29]

array([ 0.01168329, -0.03632068,  0.00341534,  0.03453458,  0.04534462],
      dtype=float32)

In [18]:
weights[3] 

array([-0.04671168,  0.00574858,  0.01876718, -0.02050376, -0.0069702 ],
      dtype=float32)