In [1]:

import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
# Load labeled CSV
df = pd.read_csv("opinions_labeled_1000.csv")
texts = df["text"].tolist()
labels = df["label"].tolist()

In [3]:
# Tokenization
tokenizer = Tokenizer(num_words=1000, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded = pad_sequences(sequences, maxlen=20, padding='post')

In [4]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(padded, labels, test_size=0.2, random_state=42)

In [5]:
# Model definition
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=1000, output_dim=16, input_length=20),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])




In [6]:
# Training
model.fit(np.array(X_train), np.array(y_train), epochs=70, validation_data=(np.array(X_test), np.array(y_test)))

Epoch 1/70
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step - accuracy: 0.6054 - loss: 0.6894 - val_accuracy: 1.0000 - val_loss: 0.6768
Epoch 2/70
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9431 - loss: 0.6694 - val_accuracy: 1.0000 - val_loss: 0.6494
Epoch 3/70
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 1.0000 - loss: 0.6365 - val_accuracy: 1.0000 - val_loss: 0.6050
Epoch 4/70
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9960 - loss: 0.5863 - val_accuracy: 1.0000 - val_loss: 0.5425
Epoch 5/70
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 1.0000 - loss: 0.5133 - val_accuracy: 1.0000 - val_loss: 0.4638
Epoch 6/70
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 1.0000 - loss: 0.4332 - val_accuracy: 1.0000 - val_loss: 0.3779
Epoch 7/70
[1m25/25[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x7e8496c17250>

In [7]:
# Evaluation
pred_probs = model.predict(np.array(X_test))
pred_labels = [1 if p > 0.5 else 0 for p in pred_probs]
print(classification_report(y_test, pred_labels))

# Save model and tokenizer config
model.save("sentiment_model.h5")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step




              precision    recall  f1-score   support

           0       1.00      1.00      1.00        95
           1       1.00      1.00      1.00       105

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200



In [8]:
test_sentences = [
    "I absolutely love this product!",
    "Terrible experience, I will never buy this again.",
    "Great quality and fast delivery.",
    "Completely useless and a waste of money.",
    "Highly recommend it to everyone!",
    "Awful packaging and very slow shipping.",
    "This is the best purchase I made this year!",
    "Not satisfied, the item broke after two days.",
    "Super helpful and kind customer service!",
    "Disappointed with how it turned out.",
    "Amazing quality, exceeded my expectations!",
    "The product stopped working after a week.",
    "Really fast shipping and nice packaging.",
    "Customer service was not helpful at all.",
    "It's okay, nothing special.",
    "Absolutely fantastic, I'm impressed!",
    "Won’t be buying from this seller again.",
    "Love it! Will order again soon.",
    "Worst purchase I've ever made.",
    "Top-notch quality and very reliable."
]


In [9]:
test_sequences = tokenizer.texts_to_sequences(test_sentences)
test_padded = pad_sequences(test_sequences, maxlen=20, padding='post')

preds = model.predict(test_padded)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step


In [10]:
print("\nPredictions on 20 new sentences:")
for i, sent in enumerate(test_sentences):
    label = "POSITIVE" if preds[i][0] > 0.5 else "NEGATIVE"
    print(f"{label} ({preds[i][0]:.2f}): {sent}")


Predictions on 20 new sentences:
POSITIVE (1.00): I absolutely love this product!
POSITIVE (0.85): Terrible experience, I will never buy this again.
POSITIVE (1.00): Great quality and fast delivery.
NEGATIVE (0.00): Completely useless and a waste of money.
POSITIVE (0.91): Highly recommend it to everyone!
NEGATIVE (0.08): Awful packaging and very slow shipping.
POSITIVE (1.00): This is the best purchase I made this year!
NEGATIVE (0.00): Not satisfied, the item broke after two days.
POSITIVE (0.80): Super helpful and kind customer service!
NEGATIVE (0.01): Disappointed with how it turned out.
POSITIVE (1.00): Amazing quality, exceeded my expectations!
NEGATIVE (0.00): The product stopped working after a week.
POSITIVE (0.97): Really fast shipping and nice packaging.
NEGATIVE (0.00): Customer service was not helpful at all.
POSITIVE (0.67): It's okay, nothing special.
POSITIVE (0.99): Absolutely fantastic, I'm impressed!
POSITIVE (0.89): Won’t be buying from this seller again.
POSITIVE