In [1]:
# import all libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import re
import nltk
from nltk.tokenize import word_tokenize

import tensorflow as tf
from tensorflow.keras.layers import Dense, Embedding, SimpleRNN
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.text import one_hot
from tensorflow.keras.utils import pad_sequences
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer




In [2]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HANNAN\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\HANNAN\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\HANNAN\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [3]:
print(tf.__version__)

2.15.0


In [4]:
# load dataset
df = pd.read_csv("../../../dataset/Restaurant reviews.csv")
df

Unnamed: 0,Review,Rating
0,"The ambience was good, food was quite good . h...",5
1,Ambience is too good for a pleasant evening. S...,5
2,A must try.. great food great ambience. Thnx f...,5
3,Soumen das and Arun was a great guy. Only beca...,5
4,Food is good.we ordered Kodi drumsticks and ba...,5
...,...,...
9995,Madhumathi Mahajan Well to start with nice cou...,3
9996,This place has never disappointed us.. The foo...,4.5
9997,"Bad rating is mainly because of ""Chicken Bone ...",1.5
9998,I personally love and prefer Chinese Food. Had...,4


In [5]:
df.dropna(inplace=True)

In [6]:
df.isnull().sum()

Review    0
Rating    0
dtype: int64

In [7]:
df.drop(index=df[df['Rating'] == "Like"].index, inplace=True)

In [8]:
# all rating into flot
df['Rating'] = df['Rating'].apply(float)

In [9]:
# Create target variable
# 1 = Positive, 0 = Negative Review
df['Target'] = df['Rating'].apply(lambda x: 1 if x > 3.0 else 0)

In [10]:
df.drop(columns=['Rating'], inplace=True)

In [11]:
df.head()

Unnamed: 0,Review,Target
0,"The ambience was good, food was quite good . h...",1
1,Ambience is too good for a pleasant evening. S...,1
2,A must try.. great food great ambience. Thnx f...,1
3,Soumen das and Arun was a great guy. Only beca...,1
4,Food is good.we ordered Kodi drumsticks and ba...,1


In [12]:

from nltk.corpus import stopwords

from nltk.stem import WordNetLemmatizer
stop_words = set(stopwords.words("english"))
important_words = {"not", "no", "nor", "never"}
final_stopwords = stop_words - important_words

lemmatize = WordNetLemmatizer()

# Clean the text
corpus = []
for i in range(0, len(df)):
    reviews = re.sub('[^a-zA-Z]', ' ', df['Review'].iloc[i])
    reviews = reviews.lower()
    reviews = word_tokenize(reviews, language="english")
    reviews = [lemmatize.lemmatize(word) for word in reviews if word not in final_stopwords]
    reviews = " ".join(reviews).strip()
    corpus.append(reviews)


In [13]:
corpus

['ambience good food quite good saturday lunch cost effective good place sate brunch one also chill friend parent waiter soumen da really courteous helpful',
 'ambience good pleasant evening service prompt food good good experience soumen da kudos service',
 'must try great food great ambience thnx service pradeep subroto personal recommendation penne alfredo pasta also music background amazing',
 'soumen da arun great guy behavior sincerety good food course would like visit place',
 'food good ordered kodi drumstick basket mutton biryani good thanks pradeep served well enjoyed ambience also good',
 'ambiance good service good food apradeecp subro best service food good papiya good hostess ur caption good star restaurant',
 'nice place ambience different food ordered tasty service also gud worth visit reasonable well really must visit place',
 'well reading many review finally visited place ambience good coming food crispy corn nice tawa fish ok basket biryani disappointed u biryani ok

In [14]:
# Define vocabulary size
voc_size = 10000

# Use Tokenizer instead of one_hot
tokenizer = Tokenizer(num_words=voc_size)
tokenizer.fit_on_texts(corpus)
X = tokenizer.texts_to_sequences(corpus)

In [15]:
X

[[9,
  1,
  2,
  100,
  1,
  438,
  62,
  323,
  2080,
  1,
  3,
  7090,
  616,
  11,
  14,
  503,
  34,
  1847,
  168,
  2293,
  1553,
  13,
  199,
  420],
 [9, 1, 567, 203, 5, 489, 2, 1, 1, 25, 2293, 1553, 1039, 5],
 [33,
  23,
  12,
  2,
  12,
  9,
  5352,
  5,
  2081,
  7091,
  668,
  946,
  1416,
  1306,
  161,
  14,
  70,
  1510,
  32],
 [2293, 1553, 5353, 12, 137, 1081, 7092, 1, 2, 64, 29, 16, 22, 3],
 [2, 1, 8, 600, 1061, 1783, 129, 19, 1, 155, 2081, 39, 27, 217, 9, 14, 1],
 [57,
  1,
  5,
  1,
  2,
  7093,
  7094,
  18,
  5,
  2,
  1,
  1277,
  1,
  2168,
  481,
  4442,
  1,
  149,
  17],
 [15, 3, 9, 143, 2, 8, 49, 5, 14, 1009, 82, 22, 357, 27, 13, 33, 22, 3],
 [27,
  1511,
  136,
  162,
  379,
  104,
  3,
  9,
  1,
  160,
  2,
  179,
  163,
  15,
  1307,
  72,
  119,
  1783,
  19,
  150,
  26,
  19,
  119,
  4,
  7095,
  4443,
  20,
  165,
  489,
  166,
  2081,
  7096],
 [89,
  2,
  274,
  16,
  61,
  2,
  199,
  20,
  3178,
  2081,
  1277,
  311,
  89,
  5,
  743,
  147,
  9

In [16]:
# Save tokenizer
with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

In [17]:
# Pad sequences
max_length = 500
X = pad_sequences(X, maxlen=max_length)
y = df['Target']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [18]:
# Build model
model = Sequential()
model.add(Embedding(input_dim=voc_size, output_dim=128, input_length=max_length))
model.add(SimpleRNN(128, activation="relu", return_sequences=True))
model.add(SimpleRNN(64, activation="relu"))
model.add(Dense(1, activation="sigmoid"))




In [19]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 500, 128)          1280000   
                                                                 
 simple_rnn (SimpleRNN)      (None, 500, 128)          32896     
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 64)                12352     
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
Total params: 1325313 (5.06 MB)
Trainable params: 1325313 (5.06 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [20]:
# compile the model
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=['accuracy'])




In [21]:
# Setup EarlyStopping
earlystoping = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)

In [22]:
# Train model
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=64,
    validation_data=(X_test, y_test),
    callbacks=[earlystoping]
)

Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100


In [23]:
# Save model
model.save("simple_rnn.h5")

  saving_api.save_model(


In [24]:
# Evaluate
test_loss, test_acc = model.evaluate(X_test, y_test)



In [25]:
# Load model and tokenizer
from tensorflow.keras.models import load_model

model = load_model("simple_rnn.h5")
with open("tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)

In [26]:
# Text preprocessing function
def preprocess_text(text):
    review = re.sub('[^a-zA-Z]', ' ', text)
    review = review.lower()
    review = word_tokenize(review, language="english")
    review = [lemmatize.lemmatize(word) for word in review if word not in final_stopwords]
    cleaned_text = " ".join(review).strip()
    encoded_review = tokenizer.texts_to_sequences([cleaned_text])
    padded_review = pad_sequences(encoded_review, maxlen=max_length)
    return padded_review

In [27]:
# Predict function
def predict_sentiment(review):
    processed_input = preprocess_text(review)
    prediction = model.predict(processed_input)
    sentiment = 'Positive' if prediction[0][0] > 0.6 else 'Negative'
    return sentiment, prediction[0][0]

In [28]:
# Example
example_review = "Hands down one of the best meals I’ve had in a long time. Everything was cooked to perfection and the flavors were out of this world."
sentiment, score = predict_sentiment(example_review)

print("Review Text:", example_review)
print("Sentiment:", sentiment)
print("Score:", score)

Review Text: Hands down one of the best meals I’ve had in a long time. Everything was cooked to perfection and the flavors were out of this world.
Sentiment: Positive
Score: 0.92771065
