In [20]:
import numpy as np
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

In [21]:
texts = [
    "I love this movie",
    "This film was terrible",
    "I enjoyed the film",
    "The movie was boring",
    "What a fantastic film",
    "I did not like the movie",
    "Amazing storyline and acting",
    "Waste of time",
    "Great direction and plot",
    "The worst movie I have seen"
]

labels = [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]

In [22]:
tokenizer = Tokenizer(num_words=1000)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

In [23]:
sequences

[[1, 8, 5, 2],
 [5, 4, 6, 9],
 [1, 10, 3, 4],
 [3, 2, 6, 11],
 [12, 13, 14, 4],
 [1, 15, 16, 17, 3, 2],
 [18, 19, 7, 20],
 [21, 22, 23],
 [24, 25, 7, 26],
 [3, 27, 2, 1, 28, 29]]

In [24]:
maxlen = 10
x_data = pad_sequences(sequences, maxlen=maxlen)

In [25]:
x_data

array([[ 0,  0,  0,  0,  0,  0,  1,  8,  5,  2],
       [ 0,  0,  0,  0,  0,  0,  5,  4,  6,  9],
       [ 0,  0,  0,  0,  0,  0,  1, 10,  3,  4],
       [ 0,  0,  0,  0,  0,  0,  3,  2,  6, 11],
       [ 0,  0,  0,  0,  0,  0, 12, 13, 14,  4],
       [ 0,  0,  0,  0,  1, 15, 16, 17,  3,  2],
       [ 0,  0,  0,  0,  0,  0, 18, 19,  7, 20],
       [ 0,  0,  0,  0,  0,  0,  0, 21, 22, 23],
       [ 0,  0,  0,  0,  0,  0, 24, 25,  7, 26],
       [ 0,  0,  0,  0,  3, 27,  2,  1, 28, 29]], dtype=int32)

In [26]:
y_data = np.array(labels)

In [27]:
train_size = int(len(x_data) * 0.8)
x_train, x_test = x_data[:train_size], x_data[train_size:]
y_train, y_test = y_data[:train_size], y_data[train_size:]

In [28]:
model = Sequential([
    Embedding(input_dim=1000, output_dim=64),
    LSTM(64),
    Dense(1, activation='sigmoid')
])

In [29]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [30]:
model.fit(x_train, y_train, epochs=10, batch_size=2, validation_split=0.2)

Epoch 1/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step - accuracy: 0.1458 - loss: 0.6999 - val_accuracy: 0.5000 - val_loss: 0.6929
Epoch 2/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5000 - loss: 0.6885 - val_accuracy: 0.5000 - val_loss: 0.6931
Epoch 3/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.3125 - loss: 0.6905 - val_accuracy: 0.5000 - val_loss: 0.6931
Epoch 4/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 1.0000 - loss: 0.6792 - val_accuracy: 0.5000 - val_loss: 0.6935
Epoch 5/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 1.0000 - loss: 0.6757 - val_accuracy: 0.5000 - val_loss: 0.6938
Epoch 6/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 1.0000 - loss: 0.6705 - val_accuracy: 0.5000 - val_loss: 0.6943
Epoch 7/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0

<keras.src.callbacks.history.History at 0x2805af8f0>

In [31]:
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_acc:.4f}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 89ms/step - accuracy: 0.5000 - loss: 0.6955
Test accuracy: 0.5000


In [32]:
predictions = model.predict(x_test)
predicted_labels = (predictions > 0.5).astype(int)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step


In [33]:
for i in range(len(x_test)):
    print(f"Sentence: {tokenizer.sequences_to_texts([x_test[i]])[0]}")
    print(f"True Label: {y_test[i]}, Predicted Label: {predicted_labels[i][0]}")
    print("---")

Sentence: great direction and plot
True Label: 1, Predicted Label: 1
---
Sentence: the worst movie i have seen
True Label: 0, Predicted Label: 1
---
