In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Embedding

In [6]:
reviews = [
    'nice food',
    'too good',
    'amazing restaurant',
    'just loved it',
    'will go again',
    'horrible food',
    'never go there',
    'putangina panget lasa',
    'poor service',
    'poor quality',
    'needs improvement'
]

sentiment = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0])

In [10]:
one_hot("amazing restaurant", 3000)

[717, 2977]

In [12]:
one_hot("amazing restaurant", 30)

[29, 27]

In [14]:
vocab_size = 30

encoded_reviews = [one_hot(d, vocab_size) for d in reviews]
encoded_reviews

[[26, 4],
 [25, 11],
 [29, 27],
 [12, 8, 17],
 [9, 8, 11],
 [20, 4],
 [14, 8, 20],
 [2, 11, 2],
 [3, 23],
 [3, 3],
 [18, 19]]

In [15]:
max_length = 3

padded_reviews = pad_sequences(encoded_reviews, maxlen=max_length, padding='post')
padded_reviews

array([[26,  4,  0],
       [25, 11,  0],
       [29, 27,  0],
       [12,  8, 17],
       [ 9,  8, 11],
       [20,  4,  0],
       [14,  8, 20],
       [ 2, 11,  2],
       [ 3, 23,  0],
       [ 3,  3,  0],
       [18, 19,  0]], dtype=int32)

In [20]:
embedded_vector_size = 4

model = Sequential()
model.add(Embedding(vocab_size, embedded_vector_size, input_length=max_length, name="embedding"))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

In [21]:
X = padded_reviews
y = sentiment

In [22]:
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [23]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 3, 4)              120       
                                                                 
 flatten_1 (Flatten)         (None, 12)                0         
                                                                 
 dense_1 (Dense)             (None, 1)                 13        
                                                                 
Total params: 133
Trainable params: 133
Non-trainable params: 0
_________________________________________________________________


In [24]:
model.fit(X, y, epochs=50, verbose=0)

<keras.callbacks.History at 0x790238cb6230>

In [25]:
loss, accuracy = model.evaluate(X, y)
accuracy



0.8181818127632141

In [30]:
weights = model.get_layer("embedding").get_weights()[0]
weights

array([[ 0.06649303,  0.02315529, -0.02015208, -0.01888686],
       [-0.04218109, -0.04471217, -0.03650121,  0.04504972],
       [ 0.05899217,  0.0635724 , -0.07502172, -0.0411803 ],
       [ 0.09611884,  0.08986293, -0.06288218, -0.0596535 ],
       [ 0.04987312, -0.00870864,  0.02217744, -0.02585319],
       [ 0.04151842,  0.03624126,  0.02228278,  0.0122214 ],
       [ 0.00896414, -0.01220701,  0.03910213, -0.04683336],
       [-0.04464271, -0.00132652, -0.0123353 , -0.00576012],
       [ 0.0608191 ,  0.00634568,  0.08648341, -0.09767055],
       [-0.00264543, -0.00728384,  0.00633743,  0.0711911 ],
       [ 0.01089638,  0.02334091,  0.00641201, -0.01123213],
       [-0.06218822, -0.00387696,  0.09492856,  0.0827812 ],
       [-0.0782242 , -0.0489798 ,  0.06923579,  0.07094882],
       [ 0.00564749,  0.03838233,  0.04075715, -0.03790627],
       [ 0.09341422,  0.0180643 , -0.03229119, -0.03883103],
       [ 0.03446886,  0.00715562,  0.04122702, -0.00819367],
       [-0.02907385,  0.

In [31]:
len(weights)

30

In [35]:
weights[26] # nice

array([-0.02062035, -0.03263081,  0.04078335,  0.09049056], dtype=float32)

In [34]:
weights[4] # food

array([ 0.04987312, -0.00870864,  0.02217744, -0.02585319], dtype=float32)