In [35]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Dropout
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [36]:
df = pd.read_csv('./IMDB-Dataset.csv')
df.head()

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [37]:
le = LabelEncoder()
df['sentiment'] = le.fit_transform(df['sentiment'])

In [38]:
X = df['review'].values
y = df['sentiment'].values

# Tokenize text
tokenizer = Tokenizer(num_words=10000, oov_token='<OOV>')
tokenizer.fit_on_texts(X)
X_seq = tokenizer.texts_to_sequences(X)

# Pad sequences to make them equal length
X_padded = pad_sequences(X_seq, maxlen=200, padding='post', truncating='post')

In [39]:
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)

In [40]:
model = Sequential([
    Embedding(input_dim=10000, output_dim=64, input_length=X_train.shape[1]),
    SimpleRNN(64, return_sequences=False),
    Dropout(0.5),
    Dense(32, activation='tanh'),
    Dense(1, activation='sigmoid')
])



In [41]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [43]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=64,
    verbose=1
)

Epoch 1/50
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 83ms/step - accuracy: 0.4978 - loss: 0.6964 - val_accuracy: 0.5019 - val_loss: 0.6936
Epoch 2/50
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 79ms/step - accuracy: 0.5005 - loss: 0.6951 - val_accuracy: 0.5030 - val_loss: 0.6946
Epoch 3/50
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 58ms/step - accuracy: 0.5009 - loss: 0.6952 - val_accuracy: 0.5051 - val_loss: 0.6939
Epoch 4/50
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 68ms/step - accuracy: 0.5020 - loss: 0.6945 - val_accuracy: 0.5046 - val_loss: 0.6930
Epoch 5/50
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 72ms/step - accuracy: 0.5121 - loss: 0.6930 - val_accuracy: 0.5065 - val_loss: 0.6926
Epoch 6/50
[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 87ms/step - accuracy: 0.5407 - loss: 0.6873 - val_accuracy: 0.5063 - val_loss: 0.6929
Epoch 7/50
[1m6

In [44]:
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc*100:.2f}%")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 25ms/step - accuracy: 0.5116 - loss: 0.7044
Test Accuracy: 51.16%


In [46]:
sample_reviews = ["The movie was amazing!", "Worst film ever."]
sample_seq = tokenizer.texts_to_sequences(sample_reviews)
sample_pad = pad_sequences(sample_seq, maxlen=200, padding='post')

predictions = model.predict(sample_pad)
print(["positive" if p > 0.5 else "negative" for p in predictions])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 208ms/step
['negative', 'negative']
