In [None]:
import pandas as pd
import string
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import numpy as np


In [None]:
df = pd.read_csv("reviews for training.csv", encoding="latin1", on_bad_lines='skip')

In [None]:
# 2. Clean columns and preprocess text
df.columns = df.columns.str.strip().str.lower()
df['clean_sentence'] = df['sentence'].str.lower().str.replace(f"[{string.punctuation}]", "", regex=True)
df = df.dropna(subset=['clean_sentence', 'label'])

In [None]:
# 3. Split into train and test
X_train, X_test, y_train, y_test = train_test_split(
    df['clean_sentence'], df['label'], test_size=0.2, random_state=42
)

In [None]:
# 4. Tokenize and pad sequences
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(X_train)
max_length = 100

In [None]:
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_train_pad = pad_sequences(X_train_seq, maxlen=max_length, padding='post', truncating='post')

In [None]:
X_test_seq = tokenizer.texts_to_sequences(X_test)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_length, padding='post', truncating='post')


In [None]:
# 5. Encode labels
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_test_enc = le.transform(y_test)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=10000, output_dim=16, input_length=max_length),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()




In [None]:
# 7. Train model
history = model.fit(
    X_train_pad, y_train_enc,
    epochs=10,
    validation_data=(X_test_pad, y_test_enc),
    batch_size=32
)


Epoch 1/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.8224 - loss: 0.4038 - val_accuracy: 0.9141 - val_loss: 0.2189
Epoch 2/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9256 - loss: 0.1950 - val_accuracy: 0.9219 - val_loss: 0.2059
Epoch 3/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9343 - loss: 0.1715 - val_accuracy: 0.9190 - val_loss: 0.2107
Epoch 4/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.9400 - loss: 0.1613 - val_accuracy: 0.9222 - val_loss: 0.2097
Epoch 5/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.9436 - loss: 0.1514 - val_accuracy: 0.9167 - val_loss: 0.2206
Epoch 6/10
[1m2500/2500[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - accuracy: 0.9442 - loss: 0.1495 - val_accuracy: 0.9198 - val_loss: 0.2162
Epoch 7/10
[1m

In [None]:
# 8. Evaluate model
loss, accuracy = model.evaluate(X_test_pad, y_test_enc)
print(f"Test Accuracy: {accuracy:.4f}")

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9199 - loss: 0.2206
Test Accuracy: 0.9177


In [None]:
# 9. Evaluate the model on test data
loss, accuracy = model.evaluate(X_test_pad, y_test_enc)
print(f"\nTest Accuracy: {accuracy:.4f}")

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9199 - loss: 0.2206

Test Accuracy: 0.9177


In [None]:
# 10. Predict on test data (probabilities)
y_pred_prob = model.predict(X_test_pad)

[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step


In [None]:
# 11. Convert probabilities to binary predictions (threshold 0.5)
y_pred = (y_pred_prob > 0.5).astype(int).flatten()

In [None]:
# 12. Calculate precision, recall, F1 score
precision = precision_score(y_test_enc, y_pred)
recall = recall_score(y_test_enc, y_pred)
f1 = f1_score(y_test_enc, y_pred)

print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1 Score:  {f1:.4f}")

Precision: 0.9100
Recall:    0.9278
F1 Score:  0.9188


In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

precision = precision_score(y_test_enc, y_pred)
recall = recall_score(y_test_enc, y_pred)
f1 = f1_score(y_test_enc, y_pred)

In [None]:
# 13. Optional: Print full classification report
print("\nClassification Report:\n", classification_report(y_test_enc, y_pred))



Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.91      0.92      9965
           1       0.91      0.93      0.92     10035

    accuracy                           0.92     20000
   macro avg       0.92      0.92      0.92     20000
weighted avg       0.92      0.92      0.92     20000

