<a href="https://colab.research.google.com/github/anis-mselmi/Restaurant-Review-Sentiment-Analysis/blob/main/Restaurant_Review_Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pandas scikit-learn tensorflow matplotlib seaborn




In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical


In [None]:
data = {
    'review': [
        "The tacos were amazing!",
        "I hated the burger, very dry.",
        "Pizza was okay, not the best.",
        "Loved the pasta, will order again.",
        "Terrible service, I won’t come back.",
        "The salad was fresh and tasty.",
        "The steak was cold and disappointing.",
        "Excellent food and great staff!",
        "The sandwich was average, nothing special.",
        "Worst experience ever, very bad!"
    ],
    'sentiment': [
        "positive", "negative", "neutral",
        "positive", "negative", "positive",
        "negative", "positive", "neutral", "negative"
    ]
}

df = pd.DataFrame(data)
df.head()


In [None]:
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['sentiment'])
num_classes = len(label_encoder.classes_)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    df['review'], df['label'], test_size=0.3, random_state=42
)


In [None]:
max_words = 5000
max_len = 20

tokenizer = Tokenizer(num_words=max_words, lower=True, split=" ")
tokenizer.fit_on_texts(df['review'])

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_pad = pad_sequences(X_train_seq, maxlen=max_len)
X_test_pad = pad_sequences(X_test_seq, maxlen=max_len)

y_train_cat = to_categorical(y_train, num_classes=num_classes)
y_test_cat = to_categorical(y_test, num_classes=num_classes)


In [None]:
model = Sequential()
model.add(Embedding(max_words, 128, input_length=max_len))
model.add(LSTM(64, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(32, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(num_classes, activation="softmax"))

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])


In [None]:
history = model.fit(
    X_train_pad, y_train_cat,
    validation_data=(X_test_pad, y_test_cat),
    epochs=10,
    batch_size=4,
    verbose=1
)


In [None]:
plt.figure(figsize=(12,5))

plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label="Train Acc")
plt.plot(history.history['val_accuracy'], label="Val Acc")
plt.title("Model Accuracy")
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history['loss'], label="Train Loss")
plt.plot(history.history['val_loss'], label="Val Loss")
plt.title("Model Loss")
plt.legend()

plt.show()


In [None]:
custom_reviews = [
    "The tacos are cold and bad",
    "I really loved the pizza, so tasty!",
    "The service was okay, nothing special"
]

custom_seq = tokenizer.texts_to_sequences(custom_reviews)
custom_pad = pad_sequences(custom_seq, maxlen=max_len)

pred_probs = model.predict(custom_pad)
pred_classes = np.argmax(pred_probs, axis=1)
pred_labels = label_encoder.inverse_transform(pred_classes)

for review, sentiment in zip(custom_reviews, pred_labels):
    print(f"Review: '{review}' --> Prediction: {sentiment}")
