In [1]:
import numpy as np
from tensorflow import keras
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
reviews = ['nice food',
        'amazing restaurant',
        'too good',
        'just loved it!',
        'will go again',
        'horrible food',
        'never go there',
        'poor service',
        'poor quality',
        'needs improvement']

sentiment = np.array([1,1,1,1,1,0,0,0,0,0])

In [3]:
one_hot('amazing restaurant',30) #return UNIQUE int for each word in the range provided

[24, 1]

In [4]:
vocab_size = 30
encoded_reviews = [one_hot(d, vocab_size) for d in reviews]
encoded_reviews

[[29, 23],
 [24, 1],
 [18, 8],
 [20, 22, 11],
 [6, 6, 20],
 [4, 23],
 [14, 6, 6],
 [9, 18],
 [9, 16],
 [28, 10]]

In [5]:
max_length = 3
padded_reviews = pad_sequences(encoded_reviews,maxlen=max_length,padding='post')
padded_reviews

array([[29, 23,  0],
       [24,  1,  0],
       [18,  8,  0],
       [20, 22, 11],
       [ 6,  6, 20],
       [ 4, 23,  0],
       [14,  6,  6],
       [ 9, 18,  0],
       [ 9, 16,  0],
       [28, 10,  0]])

In [6]:
embeded_vector_size = 4 #no. of features/weights of a single word

model = keras.Sequential()
model.add(keras.layers.Embedding(vocab_size,embeded_vector_size,input_length=max_length,name='embedding'))
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(1,activation='sigmoid'))

In [7]:
x = padded_reviews
y = sentiment

In [8]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 3, 4)              120       
_________________________________________________________________
flatten (Flatten)            (None, 12)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 13        
Total params: 133
Trainable params: 133
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.fit(x,y,epochs=50,verbose=2) #verbose just use to control display

Epoch 1/50
1/1 - 1s - loss: 0.6963 - accuracy: 0.3000
Epoch 2/50
1/1 - 0s - loss: 0.6956 - accuracy: 0.3000
Epoch 3/50
1/1 - 0s - loss: 0.6948 - accuracy: 0.3000
Epoch 4/50
1/1 - 0s - loss: 0.6940 - accuracy: 0.3000
Epoch 5/50
1/1 - 0s - loss: 0.6932 - accuracy: 0.4000
Epoch 6/50
1/1 - 0s - loss: 0.6925 - accuracy: 0.4000
Epoch 7/50
1/1 - 0s - loss: 0.6917 - accuracy: 0.5000
Epoch 8/50
1/1 - 0s - loss: 0.6909 - accuracy: 0.5000
Epoch 9/50
1/1 - 0s - loss: 0.6902 - accuracy: 0.6000
Epoch 10/50
1/1 - 0s - loss: 0.6894 - accuracy: 0.6000
Epoch 11/50
1/1 - 0s - loss: 0.6887 - accuracy: 0.6000
Epoch 12/50
1/1 - 0s - loss: 0.6879 - accuracy: 0.6000
Epoch 13/50
1/1 - 0s - loss: 0.6871 - accuracy: 0.7000
Epoch 14/50
1/1 - 0s - loss: 0.6864 - accuracy: 0.7000
Epoch 15/50
1/1 - 0s - loss: 0.6856 - accuracy: 0.7000
Epoch 16/50
1/1 - 0s - loss: 0.6849 - accuracy: 0.7000
Epoch 17/50
1/1 - 0s - loss: 0.6841 - accuracy: 0.7000
Epoch 18/50
1/1 - 0s - loss: 0.6833 - accuracy: 0.8000
Epoch 19/50
1/1 - 0

<tensorflow.python.keras.callbacks.History at 0x18da0237580>

In [10]:
model.evaluate(x,y)



[0.6525763273239136, 1.0]

In [11]:
weights = model.get_layer('embedding').get_weights()[0]
weights

array([[ 8.7234497e-02,  1.9657228e-02,  3.0412266e-02,  9.9231310e-02],
       [-9.8056264e-02, -9.8922728e-03,  4.3531906e-02,  9.9695502e-03],
       [ 1.9075643e-02, -7.9663768e-03,  2.8835092e-02, -4.5172323e-02],
       [-2.6540041e-02, -3.1085564e-02, -1.1335529e-02,  4.1585628e-02],
       [ 4.7713615e-02, -8.7312600e-03, -9.9595055e-02,  3.2745399e-02],
       [-2.4616873e-02,  3.8121168e-02, -2.0763446e-02, -3.7323311e-04],
       [ 4.8100825e-02,  3.2865457e-02,  9.6880600e-02, -1.0563501e-02],
       [ 3.9637122e-02,  8.1351772e-03, -4.5980979e-02,  1.6229775e-02],
       [-3.1740135e-03, -8.6262137e-02,  1.8181495e-02,  9.7642332e-02],
       [ 8.7391369e-02, -5.5641998e-02, -7.2022812e-03,  8.9069987e-03],
       [ 7.5683899e-02,  5.8969043e-02, -4.5979012e-02, -3.5830487e-02],
       [-5.7851806e-02, -5.0243124e-02, -2.4915883e-02, -7.2731845e-02],
       [ 4.4425577e-04,  4.7789764e-02, -4.8449840e-02,  4.7400344e-02],
       [-2.5510287e-02, -2.0737529e-02, -4.9043447e

In [12]:
len(weights)

30

In [13]:
weights[one_hot('nice', vocab_size)]

array([[-0.05268809,  0.07265554,  0.02862029, -0.0276777 ]],
      dtype=float32)

In [14]:
weights[one_hot('good', vocab_size)]

array([[-0.00317401, -0.08626214,  0.0181815 ,  0.09764233]],
      dtype=float32)