# Libraries

In [1]:
import pandas as pd
import csv

import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

# Dataset

In [3]:
# Load the CSV file into a pandas dataframe
df = pd.read_csv("/content/scrapped_reviews_data_v1.csv")
len(df)

51

In [4]:
# testing by removing rows with less than 10 words
df = df[df['review'].str.split().str.len() >= 10]
len(df)

32

# Preprocessing

In [5]:
preprocessed_data = []
with open("/content/scrapped_reviews_data_v1.csv", "r") as f:
    reader = csv.reader(f)
    header = next(reader)
    for row in reader:
        review, rating = row[0], row[1]

        # preprocess the data in the first column by removing rows with less than 10 words
        if len(review.split()) < 10:
            continue
        try:
            rating = float(rating)
        except ValueError:
            rating = 0.0
        
        # sentiments assigned on the basis of rating_score
        if rating >= 3.0:
            sentiment = "positive"
        else:
            sentiment = "negative"
        preprocessed_data.append([review, sentiment])

# save preprocessed data in a csv file
with open("preprocessed_v1.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["review", "sentiment"])
    writer.writerows(preprocessed_data)


# Model Creation From Scratch

Libraries

In [29]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

Load Preprocessed Dataset

In [30]:
# Load the data into a Pandas dataframe
data = pd.read_csv("/content/preprocessed_v1.csv")
data.head()

Unnamed: 0,review,sentiment
0,Excelente sucursal de la tradicional cadena de...,positive
1,"Tbh, I haven't eat at Denny's in ages. My mom ...",negative
2,"As one of the only 24 hour open places in SF, ...",negative
3,The food prices and quality is fair agreeable ...,positive
4,My sister took her daughter to Denny's in Meno...,positive


Label Encoding

In [31]:
# Label encode the target column (positive/negative)
encoder = LabelEncoder()
data['sentiment'] = encoder.fit_transform(data['sentiment'])

Preparing data for training

In [32]:
# Split the data into training and testing sets
train_data, test_data, train_labels, test_labels = train_test_split(data['review'], data['sentiment'], test_size=0.2)

# Preprocess the text data to convert it into numerical data
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=10000)
tokenizer.fit_on_texts(train_data)
train_data = tokenizer.texts_to_sequences(train_data)
test_data = tokenizer.texts_to_sequences(test_data)
train_data = tf.keras.preprocessing.sequence.pad_sequences(train_data, maxlen=200)
test_data = tf.keras.preprocessing.sequence.pad_sequences(test_data, maxlen=200)

Model Architecture

In [33]:
# Define the model architecture
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(10000, 128),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

Training

In [27]:
# Train the model
history = model.fit(train_data, train_labels, epochs=20, validation_data=(test_data, test_labels))

# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(test_data, test_labels)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Test Loss: 0.3705103099346161
Test Accuracy: 0.8571428656578064


Testing results of our trained model by giving sequences

In [28]:
# Use the trained model to make predictions on new data
new_reviews = [
    "Tbh, I haven't eat at Denny's in ages. My mom and I decided to eat dinner here after our mini-shopping spree for Christmas. The ambiance has changed significantly but it brings me back to my childhood of eating Denny's at this particular location. I'm glad they're able to survive and thrive during the pandemic. It wasn't a busy and were seated immediately. We were able to choose our spot and sat closer to the kitchen and hallway to the restroom. I ordered the Plate Lickin' Fried Chicken with sides of red skin potatoes and steamed vegetables. My mom ordered the country fried steak with mashed potatoes and a side salad. Since we were in the mood for dinner but still wanted a small taste of breakfast, we ordered a side stack of pancakes. (If you don't want a full breakfast plate, just make sure to read carefully and look for the sides on the lower page of the menu). They also have Coke Zero in case you still want soda but calorie free, with refills. Props to Ryan for his above and beyond skills at accommodating us. Our orders arrived within 10 minutes. The Plate Lickin Fried Chicken had two breaded fried chicken breasts and blended well with their cream sauce and sides. The chicken was still crispy and tender. Surprisingly their food is not as greasy as I remembered and still had room for pancakes. Their pancakes are hella fluffy, moist and matches with their pancake syrup. Please note that upon finishing your meal, be sure to pay your tab at the cashier towards the entrance. I love their cute Christmas tree.",
    "This restaurant has good food",
    "try burger of dooney's, its quite good"
    ]

for new_review in new_reviews:
  new_review = tokenizer.texts_to_sequences([new_review])
  new_review = tf.keras.preprocessing.sequence.pad_sequences(new_review, maxlen=200)
  prediction = model.predict(new_review)
  prediction_label = encoder.inverse_transform(prediction.round().astype(int))
  print("Sentiment:", prediction_label[0])

Sentiment: negative
Sentiment: positive
Sentiment: positive
