In [1]:
import pandas as pd
import random

# ----------------------------------------------------------
# 1) Create sample positive and negative sentences
# ----------------------------------------------------------
positive_sentences = [
    "I loved this product",
    "This movie was fantastic",
    "Amazing experience",
    "Highly recommend it",
    "Great service and quality",
    "Excellent work",
    "Very satisfied with this",
    "Absolutely wonderful",
    "I am happy with this",
    "Brilliant performance"
]

negative_sentences = [
    "I hated this product",
    "This movie was terrible",
    "Very disappointing",
    "Not recommended",
    "Poor service and quality",
    "Bad experience",
    "Totally unsatisfied",
    "Absolutely awful",
    "I am unhappy with this",
    "Horrible performance"
]

# ----------------------------------------------------------
# 2) Generate 50 positive + 50 negative rows
# ----------------------------------------------------------
data = []
for _ in range(50):
    data.append([random.choice(positive_sentences), 1])  # label 1 = positive
    data.append([random.choice(negative_sentences), 0])  # label 0 = negative

# Shuffle the dataset
random.shuffle(data)

# ----------------------------------------------------------
# 3) Create DataFrame and save as CSV
# ----------------------------------------------------------
df = pd.DataFrame(data, columns=["text", "label"])
df.to_csv("synthetic_sentiment_dataset.csv", index=False)

print("Dataset created with 100 rows and saved as 'synthetic_sentiment_dataset.csv'")
print(df.head())

Dataset created with 100 rows and saved as 'synthetic_sentiment_dataset.csv'
                       text  label
0    I am unhappy with this      0
1  Poor service and quality      0
2  Poor service and quality      0
3      I hated this product      0
4      Absolutely wonderful      1


In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from sklearn.model_selection import train_test_split

# ----------------------------------------------------------
# 1) LOAD DATASET
# ----------------------------------------------------------
df = pd.read_csv("synthetic_sentiment_dataset.csv")
texts = df["text"].values
labels = df["label"].values

# ----------------------------------------------------------
# 2) TOKENIZE TEXT
# ----------------------------------------------------------
vocab_size = 5000
tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)

# Check if key negative words are in vocabulary
print("Index of 'terrible':", tokenizer.word_index.get('terrible'))

# ----------------------------------------------------------
# 3) PAD SEQUENCES
# ----------------------------------------------------------
max_len = 20  # increased to capture full sentence
X = pad_sequences(sequences, maxlen=max_len, padding='post', truncating='post')
y = np.array(labels)

# ----------------------------------------------------------
# 4) TRAIN–TEST SPLIT
# ----------------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ----------------------------------------------------------
# 5) BUILD RNN MODEL (using LSTM for better performance)
# ----------------------------------------------------------
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=64, input_length=max_len),
    LSTM(64, activation='tanh'),
    Dense(1, activation='sigmoid')
])

# ----------------------------------------------------------
# 6) COMPILE MODEL
# ----------------------------------------------------------
model.compile(
    optimizer="adam",
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

# ----------------------------------------------------------
# 7) TRAIN MODEL
# ----------------------------------------------------------
model.fit(
    X_train, y_train,
    epochs=30,         # increased epochs
    batch_size=8,      # slightly larger batch
    validation_split=0.2
)

# ----------------------------------------------------------
# 8) EVALUATE MODEL
# ----------------------------------------------------------
loss, acc = model.evaluate(X_test, y_test)
print("Test Accuracy:", acc)

# ----------------------------------------------------------
# 9) PREDICTION ON NEW TEXT
# ----------------------------------------------------------
test_texts = [
    "RNN models remember context",
    "This movie was terrible",
    "Great product, I loved it"
]

test_seq = tokenizer.texts_to_sequences(test_texts)
test_pad = pad_sequences(test_seq, maxlen=max_len, padding='post', truncating='post')

predictions = model.predict(test_pad)

print("\n--- TEST RESULTS ---")
for txt, pred in zip(test_texts, predictions):
    print(f"Text: {txt}")
    print(f"Prediction (0 to 1): {pred[0]:.4f}")
    print("------------------------")

Index of 'terrible': 19
Epoch 1/30




[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.4531 - loss: 0.7013 - val_accuracy: 0.5625 - val_loss: 0.6893
Epoch 2/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5469 - loss: 0.6923 - val_accuracy: 0.4375 - val_loss: 0.6973
Epoch 3/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5469 - loss: 0.6893 - val_accuracy: 0.4375 - val_loss: 0.7036
Epoch 4/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5469 - loss: 0.6892 - val_accuracy: 0.4375 - val_loss: 0.7075
Epoch 5/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5469 - loss: 0.6884 - val_accuracy: 0.4375 - val_loss: 0.7068
Epoch 6/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5469 - loss: 0.6846 - val_accuracy: 0.4375 - val_loss: 0.7105
Epoch 7/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 