In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

In [2]:
# ==== STEP 1: Load dataset ====
df = pd.read_csv('sentiment_data.csv')
df.head()

Unnamed: 0,text,label
0,This movie was great,1
1,Terrible acting and script,0
2,"Loved it, very entertaining",1
3,Worst film of the year,0
4,Absolutely fantastic,1


In [4]:
texts = df['text'].values
texts

array(['This movie was great', 'Terrible acting and script',
       'Loved it, very entertaining', 'Worst film of the year',
       'Absolutely fantastic', 'Not my type of movie',
       'Enjoyed every minute', 'Really boring',
       'Fantastic visuals and story', 'Poorly written'], dtype=object)

In [8]:
labels = df['label'].values
labels

array([1, 0, 1, 0, 1, 0, 1, 0, 1, 0])

In [10]:
vocab_size = 10000  # Size of the hashing space
encoded_texts = [one_hot(text, vocab_size) for text in texts]
encoded_texts

[[2092, 8089, 6210, 7923],
 [2027, 6861, 9756, 2037],
 [4139, 5924, 3890, 8753],
 [1220, 9317, 943, 1127, 9500],
 [5641, 6124],
 [6828, 5254, 1442, 943, 8089],
 [8842, 4770, 9814],
 [2773, 470],
 [6124, 1784, 9756, 8850],
 [8343, 3753]]

In [11]:
# Pad sequences to ensure equal length
max_len = 100
X = pad_sequences(encoded_texts, maxlen=max_len)
y = labels
X

array([[   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0, 2092, 8089, 6210,
        7923],
       [   0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    

In [12]:
# ==== STEP 3: Train-test split ====
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
# ==== STEP 4: Build the RNN model ====
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=64, input_length=max_len))
model.add(SimpleRNN(64, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  # Binary output

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()




In [14]:
# ==== STEP 5: Train the model ====
model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.2)

Epoch 1/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.5000 - loss: 0.6950 - val_accuracy: 0.5000 - val_loss: 0.6999
Epoch 2/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step - accuracy: 0.8333 - loss: 0.6807 - val_accuracy: 0.5000 - val_loss: 0.7009
Epoch 3/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step - accuracy: 1.0000 - loss: 0.6681 - val_accuracy: 0.5000 - val_loss: 0.7007
Epoch 4/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step - accuracy: 1.0000 - loss: 0.6565 - val_accuracy: 0.5000 - val_loss: 0.6995
Epoch 5/5
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step - accuracy: 1.0000 - loss: 0.6452 - val_accuracy: 0.5000 - val_loss: 0.6983


<keras.src.callbacks.history.History at 0x1a3504e24d0>

In [15]:
# ==== STEP 6: Evaluate the model ====
loss, accuracy = model.evaluate(X_test, y_test)
print(f"\nTest Accuracy: {accuracy:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step - accuracy: 0.5000 - loss: 0.6919

Test Accuracy: 0.50
