In [3]:
import numpy as np
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Embedding

In [4]:
reviews = ['nice food',
            'amazing restaurant',
            'too good',
            'just loved it!',
            'will go again',
            'horrible food',
            'never go there',
            'poor service',
            'poor quality',
            'needs improvement']

In [5]:
sentiment = np.array([1,1,1,1,1,0,0,0,0,0])

In [None]:
one_hot('amazing restaurant', 30) #unique numbers between 1 to 30. Internally converted to 0 0 0 1 0 0 ....

[10, 23]

In [21]:
vocab_size = 50
encoded_reviews = [one_hot(d, vocab_size) for d in reviews]
print(encoded_reviews)

[[3, 34], [44, 22], [15, 15], [19, 20, 18], [39, 3, 17], [3, 34], [44, 3, 25], [18, 46], [18, 13], [44, 36]]


In [22]:
#Padding
max_length = 4
padded_reviews = pad_sequences(encoded_reviews, maxlen = max_length, padding = 'post')
print(padded_reviews)

[[ 3 34  0  0]
 [44 22  0  0]
 [15 15  0  0]
 [19 20 18  0]
 [39  3 17  0]
 [ 3 34  0  0]
 [44  3 25  0]
 [18 46  0  0]
 [18 13  0  0]
 [44 36  0  0]]


In [23]:
embedded_vector_size = 4 #size of vector for each word

model = Sequential()
model.add(Embedding(vocab_size, embedded_vector_size, input_length = max_length, name = 'embedding'))
model.add(Flatten())
model.add(Dense(1, activation = 'sigmoid'))



In [24]:
X = padded_reviews
y = sentiment

In [25]:
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
#model.summary()

In [26]:
model.fit(X,y, epochs =50, verbose = 0)

<keras.src.callbacks.history.History at 0x1570c3e1550>

In [27]:
loss, accuracy = model.evaluate(X, y)
accuracy

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step - accuracy: 0.9000 - loss: 0.6542


0.8999999761581421

In [None]:
#embeddings vector resulted as a side effect while doing sentiment analysis (Supervised learning)
weights = model.get_layer('embedding').get_weights()[0]
weights #can save this in a file and load it later

array([[ 2.71424167e-02, -1.42098907e-02,  1.70980729e-02,
        -2.29597483e-02],
       [-3.37710008e-02, -3.96781676e-02,  4.18131240e-02,
         2.55563743e-02],
       [ 2.69750692e-02,  3.54660489e-02, -4.11847606e-02,
        -3.63595262e-02],
       [-2.94785406e-02,  2.98756752e-02,  4.67245616e-02,
        -2.56545525e-02],
       [ 1.10198036e-02, -4.40261848e-02, -9.64876264e-03,
         4.37207706e-02],
       [ 3.87863182e-02, -1.95443034e-02,  4.15891148e-02,
         1.46674551e-02],
       [-4.75875624e-02, -4.09774855e-03,  1.11440420e-02,
         2.11486705e-02],
       [ 1.17385611e-02, -3.78910303e-02, -8.19353014e-03,
         1.55467428e-02],
       [-1.14446655e-02,  3.64216603e-02, -1.47128701e-02,
         2.00525559e-02],
       [ 1.18175149e-02, -8.04296881e-03, -4.45352308e-02,
         3.48610394e-02],
       [-9.73242521e-03,  1.31715462e-03, -8.93539190e-03,
        -3.60076427e-02],
       [ 3.99339199e-03, -2.78848168e-02, -1.60743818e-02,
      

In [30]:
#for 'nice food', one hot encoding is 3 and 34
print(weights[3])
print(weights[34])

[-0.02947854  0.02987568  0.04672456 -0.02565455]
[-0.02090001 -0.03819225  0.02016148 -0.03170239]
