In [18]:
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load dataset
df = pd.read_csv("Reviews.csv")
df = df[['Text', 'Score']].dropna()
df = df[df['Score'] != 3]  # Keep only positive(4,5) and negative(1,2)

# Convert to binary sentiment
df['Sentiment'] = df['Score'].apply(lambda x: 1 if x > 3 else 0)

# Split
X_train, X_test, y_train, y_test = train_test_split(df['Text'], df['Sentiment'], test_size=0.2)

# Tokenization
tokenizer = Tokenizer(num_words=10000)
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_pad = pad_sequences(X_train_seq, maxlen=100)
X_test_pad = pad_sequences(X_test_seq, maxlen=100)

# Build model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=10000, output_dim=16, input_length=100),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

# Train
model.fit(X_train_pad, y_train, epochs=15, batch_size=128, validation_split=0.1)

# Evaluate
loss, acc = model.evaluate(X_test_pad, y_test)
print("Test Accuracy:", acc)




Epoch 1/15
[1m2958/2958[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.8710 - loss: 0.3307 - val_accuracy: 0.9216 - val_loss: 0.1918
Epoch 2/15
[1m2958/2958[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.9334 - loss: 0.1705 - val_accuracy: 0.9329 - val_loss: 0.1690
Epoch 3/15
[1m2958/2958[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.9389 - loss: 0.1579 - val_accuracy: 0.9364 - val_loss: 0.1621
Epoch 4/15
[1m2958/2958[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.9411 - loss: 0.1523 - val_accuracy: 0.9377 - val_loss: 0.1600
Epoch 5/15
[1m2958/2958[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.9430 - loss: 0.1482 - val_accuracy: 0.9385 - val_loss: 0.1583
Epoch 6/15
[1m2958/2958[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.9447 - loss: 0.1450 - val_accuracy: 0.9393 - val_loss: 0.1584
Epoch 7/15
[1m2

In [27]:
new_review = ["waste of money"]

In [28]:
new_seq = tokenizer.texts_to_sequences(new_review)
new_pad = pad_sequences(new_seq, maxlen=100)

In [29]:
pred = model.predict(new_pad)
print("Raw Prediction (sigmoid):", pred[0][0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Raw Prediction (sigmoid): 0.32184258


In [30]:
final_sentiment = "Positive" if pred[0][0] >= 0.5 else "Negative"
print("Predicted Sentiment:", final_sentiment)

Predicted Sentiment: Negative
