In [22]:
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Embedding

In [23]:
reviews = ['nice food',
        'amazing restaurant',
        'too good',
        'just loved it!',
        'will go again',
        'horrible food',
        'never go there',
        'poor service',
        'poor quality',
        'needs improvement']

sentiment = np.array([1,1,1,1,1,0,0,0,0,0])#first 5 are positive and the last 5 are negative    


In [24]:
print(one_hot(reviews[0], 30)) #one hot encoding is a way to convert text into numbers. It assigns a unique integer to each word in the text,
#based on the size of the vocabulary (in this case, 30). 
# The output is a list of integers representing the words in the review.

[3, 10]


In [25]:
vocab_size = 30
one_hot_reviews = [one_hot(i, vocab_size) for i in reviews]
print(one_hot_reviews)

[[3, 10], [27, 29], [4, 22], [12, 18, 25], [25, 16, 21], [10, 10], [28, 16, 3], [23, 8], [23, 19], [24, 20]]


In [26]:
max_length = 3
padded_reviews = pad_sequences(one_hot_reviews, maxlen=max_length, padding='post')
# The pad_sequences function is used to ensure that all reviews have the same length (max_length).
print(padded_reviews)

[[ 3 10  0]
 [27 29  0]
 [ 4 22  0]
 [12 18 25]
 [25 16 21]
 [10 10  0]
 [28 16  3]
 [23  8  0]
 [23 19  0]
 [24 20  0]]


In [27]:
embedd_vector_size = 4

x=padded_reviews
y=sentiment

model = Sequential()
model.add(Embedding(vocab_size, embedd_vector_size, input_length=max_length, name='embedding_layer'))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
#binary_crossentropy is used for binary classification problems, where the output is either 0 or 1.
print(model.summary())



None


In [28]:
model.fit(x,y,epochs=50,verbose=0)
#evaluate the model
loss, accuracy = model.evaluate(x,y)
print('Loss:', loss)
print('Accuracy:', accuracy)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step - accuracy: 1.0000 - loss: 0.6085
Loss: 0.608512282371521
Accuracy: 1.0


In [29]:
print(model.get_layer('embedding_layer').get_weights()[0])

[[ 0.00723376 -0.0179082  -0.06121919 -0.00423777]
 [-0.01887142  0.01527797  0.03427998  0.04109187]
 [ 0.02719057 -0.00536913  0.01874372 -0.03626066]
 [ 0.07392401  0.00275698 -0.09739076  0.05047316]
 [ 0.06894851 -0.09812812 -0.07060981  0.02531799]
 [ 0.00770133  0.03849853  0.03670016 -0.00639415]
 [ 0.00659152  0.00712286  0.00269165  0.02734456]
 [ 0.03523055 -0.04572887 -0.00648216 -0.03562593]
 [ 0.08310551  0.08428827 -0.00939259 -0.06269748]
 [ 0.00261679  0.02251512 -0.01675215 -0.00274006]
 [-0.04879187  0.07869159 -0.02077291 -0.06202257]
 [-0.00463132  0.03166536  0.04002229 -0.01365586]
 [ 0.07473644 -0.06388447 -0.00627221  0.00332647]
 [-0.03940432 -0.01632028  0.03307937 -0.00604354]
 [ 0.01743302  0.00769366 -0.03756002 -0.01331495]
 [ 0.04474026 -0.01082478 -0.00581642 -0.030571  ]
 [-0.01555849  0.05095023  0.02614028  0.01152074]
 [ 0.02710367  0.0192687  -0.0175606   0.02268017]
 [-0.08960173 -0.04157067  0.01014874  0.09734825]
 [ 0.01541071  0.00835343 -0.02