In [1]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Example text data
texts = [
    "The movie was absolutely brilliant, loved every minute of it!",
    "The plot was engaging and the characters were well-developed.",
    "I couldn't stand the terrible dialogue and wooden acting.",
    "The cinematography was stunning, but the story fell flat for me.",
    "Overall, I found the film to be disappointing and uninspired.",
    "I was pleasantly surprised by how much I enjoyed the movie.",
    "The acting was superb, especially from the lead actor.",
    "The special effects were impressive, but the pacing was off.",
    "This movie is a must-watch for any film enthusiast.",
    "I found it hard to connect with any of the characters.",
    "The movie kept me on the edge of my seat from start to finish.",
    "I couldn't stop laughing at the hilarious jokes throughout.",
    "The soundtrack added so much depth to the emotional scenes.",
    "The ending was predictable and left me feeling unsatisfied.",
    "I was deeply moved by the powerful performances of the cast."
]

labels = np.array([1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1])


# Tokenizing text
max_words = 1000  # Maximum number of words to tokenize
maxlen = 20  # Maximum length of input sequences

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))

# Padding sequences to have the same length
data = pad_sequences(sequences, maxlen=maxlen)
print('Shape of data tensor:', data.shape)


# Building RNN model
embedding_dim = 100  # Dimension of word embeddings
lstm_units = 64  # Number of units in LSTM layer

model = Sequential()
model.add(Embedding(max_words, embedding_dim, input_length=maxlen))
model.add(LSTM(lstm_units))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# Training the model
model.fit(data, labels, epochs=10, batch_size=32, validation_split=0.2)

# Example new sentences for prediction
new_texts = ['I loved the movie, it was amazing!',
             'The acting was dreadful, I regret watching it.']

# Tokenizing and padding new text data
new_sequences = tokenizer.texts_to_sequences(new_texts)
new_data = pad_sequences(new_sequences, maxlen=maxlen)

# Predicting sentiment for new sentences
predictions = model.predict(new_data)

# Displaying the predictions
for i, text in enumerate(new_texts):
    sentiment = 'Positive' if predictions[i] > 0.5 else 'Negative'
    print(f'Sentence: "{text}"')
    print(f'Predicted sentiment: {sentiment} (Probability: {predictions[i][0]:.4f})')
    print()


Found 94 unique tokens.
Shape of data tensor: (15, 20)
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 20, 100)           100000    
                                                                 
 lstm (LSTM)                 (None, 64)                42240     
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
Total params: 142305 (555.88 KB)
Trainable params: 142305 (555.88 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Sentence: "I loved the movie, it was amazing!"
Predicted sentiment: Positive (Probability: 0.5873)

Sentence: "The