In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences






In [2]:
# Sample dataset: sentences and their labels (1 for positive, 0 for negative)
sentences = [
    'I love this movie',
    'This film was terrible',
    'Absolutely fantastic experience',
    'Worst movie ever',
    'I really enjoyed it',
    'Not my favorite',
    'Amazing storyline and characters',
    'I hated it',
    'Best film I have seen',
    'Awful and boring',
]
labels = [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]

# Step 1: Tokenize the sentences
vocab_size = 1000  # This limits the number of tokens the tokenizer will keep
max_length = 10  # Maximum length of the sequences

# Initialize the tokenizer and fit on sentences
tokenizer = Tokenizer(num_words=vocab_size, oov_token='<OOV>')
tokenizer.fit_on_texts(sentences)



In [4]:
tokenizer.word_index

{'<OOV>': 1,
 'i': 2,
 'this': 3,
 'movie': 4,
 'film': 5,
 'it': 6,
 'and': 7,
 'love': 8,
 'was': 9,
 'terrible': 10,
 'absolutely': 11,
 'fantastic': 12,
 'experience': 13,
 'worst': 14,
 'ever': 15,
 'really': 16,
 'enjoyed': 17,
 'not': 18,
 'my': 19,
 'favorite': 20,
 'amazing': 21,
 'storyline': 22,
 'characters': 23,
 'hated': 24,
 'best': 25,
 'have': 26,
 'seen': 27,
 'awful': 28,
 'boring': 29}

In [5]:
# Convert sentences to sequences
sequences = tokenizer.texts_to_sequences(sentences)

# Pad the sequences to ensure uniform input length
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')



In [8]:
# Step 2: Create the RNN model
embedding_dim = 16  # Size of the word embeddings
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length),
    SimpleRNN(units=32, return_sequences=False),
    Dense(units=1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Step 3: Train the model
labels = np.array(labels)
model.fit(padded_sequences, labels, epochs=10, verbose=2)




Epoch 1/10


1/1 - 5s - loss: 0.6924 - accuracy: 0.7000 - 5s/epoch - 5s/step
Epoch 2/10
1/1 - 0s - loss: 0.6819 - accuracy: 0.8000 - 15ms/epoch - 15ms/step
Epoch 3/10
1/1 - 0s - loss: 0.6716 - accuracy: 0.9000 - 18ms/epoch - 18ms/step
Epoch 4/10
1/1 - 0s - loss: 0.6613 - accuracy: 0.9000 - 16ms/epoch - 16ms/step
Epoch 5/10
1/1 - 0s - loss: 0.6507 - accuracy: 0.9000 - 19ms/epoch - 19ms/step
Epoch 6/10
1/1 - 0s - loss: 0.6396 - accuracy: 0.9000 - 19ms/epoch - 19ms/step
Epoch 7/10
1/1 - 0s - loss: 0.6280 - accuracy: 0.9000 - 17ms/epoch - 17ms/step
Epoch 8/10
1/1 - 0s - loss: 0.6154 - accuracy: 0.9000 - 25ms/epoch - 25ms/step
Epoch 9/10
1/1 - 0s - loss: 0.6019 - accuracy: 0.9000 - 17ms/epoch - 17ms/step
Epoch 10/10
1/1 - 0s - loss: 0.5872 - accuracy: 0.9000 - 21ms/epoch - 21ms/step


<keras.src.callbacks.History at 0x21a6784cad0>

In [9]:

# Step 4: Evaluate the model with a new sentence
new_sentences = ['I loved the acting', 'The plot was dull and predictable']
new_sequences = tokenizer.texts_to_sequences(new_sentences)
new_padded = pad_sequences(new_sequences, maxlen=max_length, padding='post')

# Get the model's predictions for new data
predictions = model.predict(new_padded)
for i, sentence in enumerate(new_sentences):
    sentiment = 'Positive' if predictions[i] > 0.5 else 'Negative'
    print(f'Sentence: "{sentence}" - Sentiment: {sentiment}')

Sentence: "I loved the acting" - Sentiment: Positive
Sentence: "The plot was dull and predictable" - Sentiment: Positive
