In [1]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense

In [2]:
# Load dataset
df = pd.read_csv("Reviews.csv")
df = df[['Text', 'Score']].dropna()
df = df[df['Score'] != 3]  # Remove neutral
df['Sentiment'] = df['Score'].apply(lambda x: 1 if x > 3 else 0)

In [3]:
# Split
X_train, X_test, y_train, y_test = train_test_split(df['Text'], df['Sentiment'], test_size=0.2)

In [4]:
# Tokenization
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

In [5]:
# Padding
maxlen = 150
X_train_pad = pad_sequences(X_train_seq, maxlen=maxlen)
X_test_pad = pad_sequences(X_test_seq, maxlen=maxlen)

In [6]:
# Model with BiLSTM
model = tf.keras.Sequential([
    Embedding(input_dim=10000, output_dim=64, input_length=maxlen),
    Bidirectional(LSTM(64, return_sequences=False)),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')  # Binary classification
])



In [7]:
# Compile and Train
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

model.fit(X_train_pad, y_train, epochs=5, batch_size=128, validation_split=0.1)

Epoch 1/5
[1m 684/2958[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m16:18[0m 430ms/step - accuracy: 0.8841 - loss: 0.3117


KeyboardInterrupt



In [8]:
# Evaluate
loss, acc = model.evaluate(X_test_pad, y_test)
print("Test Accuracy:", acc)

[1m3287/3287[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 33ms/step - accuracy: 0.9252 - loss: 0.1894
Test Accuracy: 0.9250591993331909
