In [14]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import numpy as np

In [15]:
texts = ["This movie is fantastic!",
         "I didn't like the acting.",
         "The plot was confusing.",
         "Amazing cinematography!",
         "The worst movie I've ever seen.",
         "What the hell is this movie even?!."        
        ]

labels = [1,0,0,1,0,0] #1 = positive, 0 = negative

In [16]:
max_words = 1000
max_len = 100
tokenizer = Tokenizer(num_words = max_words)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=max_len)

In [24]:
#split date using scikit-learn model's selection
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

In [18]:
#define a model using keras
model = tf.keras.Sequential([    
    tf.keras.layers.Embedding(input_dim=max_words, output_dim=16),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(8, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [19]:
#model compilation
model.compile(optimizer = 'adam',
              loss='binary_crossentropy',
              metrics=['accuracy']
             )

In [20]:
#training model
model.fit(X_train, np.array(y_train), epochs=13, batch_size=5)

Epoch 1/13
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.7500 - loss: 0.6791
Epoch 2/13
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.7500 - loss: 0.6567
Epoch 3/13
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.7500 - loss: 0.6385
Epoch 4/13
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.7500 - loss: 0.6213
Epoch 5/13
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.7500 - loss: 0.6051
Epoch 6/13
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.7500 - loss: 0.5901
Epoch 7/13
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.7500 - loss: 0.5767
Epoch 8/13
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.7500 - loss: 0.5652
Epoch 9/13
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m

<keras.src.callbacks.history.History at 0x1d693c9dbb0>

In [25]:
#model eval
loss, accuracy = model.evaluate(X_test, np.array(y_test))
print(f'Test Accuracy {accuracy}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 155ms/step - accuracy: 0.5000 - loss: 0.8750
Test Accuracy 0.5


In [27]:
#Test the model
test_texts = ["This movie is amazing!",
              "The acting was terrible.",
              "I loved the plot.",
              "It's a great film.",
              "I hated every minute of it."]

In [28]:
#tokenization and data test 
test_sequences = tokenizer.texts_to_sequences(test_texts)
padded_test_sequences = pad_sequences(test_sequences, maxlen=max_len)

In [29]:
#sentiment prediction
predictions = model.predict(padded_test_sequences)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 76ms/step


In [30]:
#predictions conversion
binary_predictions = [1 if pred > 0.5 else 0 for pred in predictions]

In [33]:
#print test texts and predicted sentiments
for text, pred in zip(test_texts, binary_predictions):
    sentiment = "positive" if pred == 1 else "negative"
    print(f"Text: '{text}', Predicted Sentiment: {sentiment}")

Text: 'This movie is amazing!', Predicted Sentiment: negative
Text: 'The acting was terrible.', Predicted Sentiment: negative
Text: 'I loved the plot.', Predicted Sentiment: negative
Text: 'It's a great film.', Predicted Sentiment: negative
Text: 'I hated every minute of it.', Predicted Sentiment: negative


In [None]:
#turns out i need to introduce more words diversity xD