In [1]:
import pandas as pd
import numpy as np
import re
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Embedding

df = pd.read_csv('foodpanda.csv')
print("Columns in dataset:", df.columns.tolist())


Columns in dataset: ['ID', 'Area', 'City', 'Restaurant Price', 'Avg Rating', 'Total Rating', 'Food Item', 'Food Type', 'Delivery Time', 'Review']


In [2]:
df["review_text"] = df["Review"].str.lower()
df["review_text"] = df["review_text"].replace(r'[^a-z0-9\s]', '', regex=True)
df['sent_label'] = df['Avg Rating'].apply(lambda r: 1 if r > 3.5 else 0)
df = df.dropna()


In [3]:
vocab_limit = 5000
seq_length = 200
tokenizer = Tokenizer(num_words=vocab_limit)
tokenizer.fit_on_texts(df["review_text"])
features = pad_sequences(tokenizer.texts_to_sequences(df["review_text"]), maxlen=seq_length)
labels = df['sent_label'].values


In [4]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42, stratify=labels)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42, stratify=y_train)

In [5]:
model = Sequential([
    Embedding(input_dim=vocab_limit, output_dim=16, input_length=seq_length),
    SimpleRNN(64, activation='tanh', return_sequences=False),
    Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_val, y_val), verbose=1)


Epoch 1/5




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 270ms/step - accuracy: 1.0000 - loss: 0.6277 - val_accuracy: 1.0000 - val_loss: 0.3282
Epoch 2/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - accuracy: 1.0000 - loss: 0.3169 - val_accuracy: 1.0000 - val_loss: 0.2166
Epoch 3/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step - accuracy: 1.0000 - loss: 0.2094 - val_accuracy: 1.0000 - val_loss: 0.1490
Epoch 4/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - accuracy: 1.0000 - loss: 0.1438 - val_accuracy: 1.0000 - val_loss: 0.1055
Epoch 5/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step - accuracy: 1.0000 - loss: 0.1020 - val_accuracy: 1.0000 - val_loss: 0.0772


In [None]:

score = model.evaluate(X_test, y_test, verbose=0)
print(f"Test accuracy: {score[1]:.2f}")
def predict_review_sentiment(review):
    cleaned = review.lower()
    cleaned = re.sub(r'[^a-z0-9\s]', '', cleaned)
    seq = tokenizer.texts_to_sequences([cleaned])
    padded_seq = pad_sequences(seq, maxlen=seq_length)
    prob = model.predict(padded_seq)[0][0]
    return f"{'Positive' if prob >= 0.5 else 'Negative'} (Probability: {prob:.2f})"
sample_review = "The food was great."
print("Review:", sample_review)
print("Sentiment:", predict_review_sentiment(sample_review))


Test accuracy: 1.00
Review: The food was great.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
Sentiment: Positive (Probability: 0.92)
