#RNN using sentences

In [1]:
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense

In [2]:
sentences = [
    "The weather is amazing today",
    "I hate waiting in long lines",
    "Our team won the championship",
    "That was a terrible experience",
    "The product quality is very poor",
    "I dislike loud noises",
    "This is a wonderful opportunity",
    "We hiked through a beautiful forest",
    "Helping others brings happiness",
    "Stock prices might fall next week"
]

In [3]:
labels = [1, 0, 1, 0, 0, 0, 1, 1, 1, 0]  # 1 = Positive, 0 = Negative sentiment

In [4]:
# Tokenization
tokenizer = Tokenizer(oov_token="oov")
tokenizer.fit_on_texts(sentences)
vocab = tokenizer.word_index
print("Vocabulary:", vocab)

Vocabulary: {'oov': 1, 'the': 2, 'is': 3, 'a': 4, 'i': 5, 'weather': 6, 'amazing': 7, 'today': 8, 'hate': 9, 'waiting': 10, 'in': 11, 'long': 12, 'lines': 13, 'our': 14, 'team': 15, 'won': 16, 'championship': 17, 'that': 18, 'was': 19, 'terrible': 20, 'experience': 21, 'product': 22, 'quality': 23, 'very': 24, 'poor': 25, 'dislike': 26, 'loud': 27, 'noises': 28, 'this': 29, 'wonderful': 30, 'opportunity': 31, 'we': 32, 'hiked': 33, 'through': 34, 'beautiful': 35, 'forest': 36, 'helping': 37, 'others': 38, 'brings': 39, 'happiness': 40, 'stock': 41, 'prices': 42, 'might': 43, 'fall': 44, 'next': 45, 'week': 46}


In [5]:
# Convert sentences to sequences and pad them
sequences = tokenizer.texts_to_sequences(sentences)
padded_sequences = pad_sequences(sequences, maxlen=6, padding='post', truncating='post')
print("Padded Sequences:\n", padded_sequences)

Padded Sequences:
 [[ 2  6  3  7  8  0]
 [ 5  9 10 11 12 13]
 [14 15 16  2 17  0]
 [18 19  4 20 21  0]
 [ 2 22 23  3 24 25]
 [ 5 26 27 28  0  0]
 [29  3  4 30 31  0]
 [32 33 34  4 35 36]
 [37 38 39 40  0  0]
 [41 42 43 44 45 46]]


In [6]:
# Convert labels to NumPy array
labels = np.array(labels)

In [7]:
# Define RNN model
model = Sequential()
model.add(SimpleRNN(10, input_shape=(padded_sequences.shape[1], 1), activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


  super().__init__(**kwargs)


In [8]:
# Train the model
model.fit(padded_sequences, labels, epochs=10, batch_size=2)


Epoch 1/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.5000 - loss: 0.9508
Epoch 2/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3056 - loss: 1.0129     
Epoch 3/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2653 - loss: 0.9042     
Epoch 4/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4458 - loss: 0.6673 
Epoch 5/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6111 - loss: 0.6021 
Epoch 6/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5833 - loss: 0.6457 
Epoch 7/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5958 - loss: 0.7092 
Epoch 8/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7889 - loss: 0.6265 
Epoch 9/10
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x7d48f6353490>

In [9]:
# Test on a new sentence
test_sentence = ["The scenery was breathtaking"]
test_sequence = tokenizer.texts_to_sequences(test_sentence)
test_padded = pad_sequences(test_sequence, maxlen=6, padding='post', truncating='post')
print("Test Padded Sequence:", test_padded)


Test Padded Sequence: [[ 2  1 19  1  0  0]]


In [10]:
# Prediction
prediction = model.predict(test_padded)
print("Prediction (0 = Negative, 1 = Positive):", prediction)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step
Prediction (0 = Negative, 1 = Positive): [[0.5195246]]


In [11]:
# Display vocabulary
print("Final Vocabulary:", tokenizer.word_index)

Final Vocabulary: {'oov': 1, 'the': 2, 'is': 3, 'a': 4, 'i': 5, 'weather': 6, 'amazing': 7, 'today': 8, 'hate': 9, 'waiting': 10, 'in': 11, 'long': 12, 'lines': 13, 'our': 14, 'team': 15, 'won': 16, 'championship': 17, 'that': 18, 'was': 19, 'terrible': 20, 'experience': 21, 'product': 22, 'quality': 23, 'very': 24, 'poor': 25, 'dislike': 26, 'loud': 27, 'noises': 28, 'this': 29, 'wonderful': 30, 'opportunity': 31, 'we': 32, 'hiked': 33, 'through': 34, 'beautiful': 35, 'forest': 36, 'helping': 37, 'others': 38, 'brings': 39, 'happiness': 40, 'stock': 41, 'prices': 42, 'might': 43, 'fall': 44, 'next': 45, 'week': 46}
