# Step 1: Import Libraries

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Step 2: Sample Text Data and Labels


This dataset is used for sentiment analysis, where 
- Positive reviews are labeled as 1,
- Negative reviews are labeled as 0.

In [2]:
texts = [
    "This is a positive review.",
    "I love this product.",
    "Negative experience, would not recommend.",
    "Terrible customer service.",
]

In [3]:
labels = [1, 1, 0, 0]  # 1 for positive, 0 for negative

# Step 3: Tokenization and Padding
In this step, you preprocess the text data:
- Tokenizer is used to convert text into sequences of integers and create a vocabulary with a maximum of 1000 words.
- texts_to_sequences converts the text into sequences of integer tokens.
- pad_sequences ensures that all sequences have the same length (in this case, 10)

by padding shorter sequences with zeros and truncating longer sequences.

In [4]:
tokenizer = Tokenizer(num_words=1000, oov_token="<OOV>")

In [5]:
tokenizer.fit_on_texts(texts)

In [6]:
sequences = tokenizer.texts_to_sequences(texts)

In [7]:
padded_sequences = pad_sequences(sequences, maxlen=10, padding="post", truncating="post")

# Step 4: Define the RNN Model

Here, you define the RNN model using Keras:
- An Embedding layer is used for word embeddings. It converts integer sequences into dense vectors.
- A SimpleRNN layer with 32 units is added. This layer captures sequential patterns in the data.
- A Dense layer with one output unit and a sigmoid activation function is added for binary sentiment classification.

In [8]:
model = Sequential()

In [9]:
model.add(Embedding(input_dim=1000, output_dim=16, input_length=10))

In [10]:
model.add(SimpleRNN(32))

In [11]:
model.add(Dense(1, activation="sigmoid"))

# Step 5: Compile the Model

You compile the model by specifying the optimizer, loss function (binary cross-entropy), and evaluation metric (accuracy) for training.

In [12]:
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])

# Step 6: Train the Model

- This step trains the model using the preprocessed data (padded_sequences) and labels.
- The model is trained for 10 epochs, adjusting its internal parameters to minimize the loss function.

In [13]:
model = Sequential()

In [14]:
model.add(Embedding(input_dim=1000, output_dim=16, input_length=10))

In [15]:
model.add(SimpleRNN(32))

In [16]:
model.add(Dense(1, activation="sigmoid"))

# Step 7: Make Predictions

- We use the trained model to make predictions on new test data (test_texts).
- The code tokenizes and pads the test data similarly to the training data, and then the model predicts the sentiment of each text. 
- Predictions are printed, and "positive" or "negative"
- Labels are assigned based on the model's output.

In [17]:
test_texts = ["Great service!", "Awful experience."]

In [18]:
test_sequences = tokenizer.texts_to_sequences(test_texts)

In [19]:
padded_test_sequences = pad_sequences(test_sequences, maxlen=10, padding="post", truncating="post")

In [20]:
predictions = model.predict(padded_test_sequences)



In [21]:
for i, text in enumerate(test_texts):
    sentiment = "positive" if predictions[i] > 0.5 else "negative"
    print(f"Text: {text} | Predicted sentiment: {sentiment}")

Text: Great service! | Predicted sentiment: negative
Text: Awful experience. | Predicted sentiment: positive
