In [None]:
# Keras and TensorFlow imports
import tensorflow as tf
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D
from tensorflow.python.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.python.keras.preprocessing.text import Tokenizer
from tensorflow.python.keras.preprocessing.sequence import pad_sequences

# Data processing imports
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Evaluation and Visualization imports
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt


# Data preprocessing 

In [30]:
reviews = pd.read_csv('merged_reviews.csv')

In [31]:
reviews['review_text'] = reviews['review_text'].str.lower()
reviews['review_text'] = reviews['review_text'].str.replace('[^\w\s]', '')
reviews['review_text'] = reviews['review_text'].str.replace('\s+', ' ')
reviews['review_text'] = reviews['review_text'].str.strip()
reviews.head()

Unnamed: 0,review_text,sentiment
0,sauce vipo infecte piquante sans aucun got ni ...,negative
1,la cuisine est mdiocre la sauce vipo est sans ...,negative
2,menu,positive
3,jai vraiment aim cette place le service est ch...,positive
4,cozy place good burgers price pay chicken twis...,positive


In [32]:
from nltk.corpus import stopwords

# Load stopwords for English, French, and Arabic
STOPWORDS_EN = set(stopwords.words('english'))
STOPWORDS_FR = set(stopwords.words('french'))
STOPWORDS_AR = set(stopwords.words('arabic'))

# Custom function to remove stopwords
def remove_stopwords(text, language='english'):
    """Remove stopwords based on the specified language."""
    if language == 'english':
        stopwords_set = STOPWORDS_EN
    elif language == 'french':
        stopwords_set = STOPWORDS_FR
    elif language == 'arabic':
        stopwords_set = STOPWORDS_AR
    else:
        stopwords_set = STOPWORDS_EN  # Default to English if language is unknown

    # Remove stopwords from text
    return " ".join([word for word in str(text).split() if word not in stopwords_set])

In [33]:
# Apply the function to the DataFrame
reviews['review_text'] = reviews['review_text'].apply(lambda text: remove_stopwords(text, language='english')) 
reviews['review_text'] = reviews['review_text'].apply(lambda text: remove_stopwords(text, language='french'))
reviews['review_text'] = reviews['review_text'].apply(lambda text: remove_stopwords(text, language='arabic'))
reviews.head()

Unnamed: 0,review_text,sentiment
0,sauce vipo infecte piquante sans aucun got ni ...,negative
1,cuisine mdiocre sauce vipo sans vouloir paratr...,negative
2,menu,positive
3,jai vraiment aim cette place service chaleureu...,positive
4,cozy place good burgers price pay chicken twis...,positive


# Model Architecture

In [34]:
# Assuming your data is in the 'reviews' DataFrame and contains the columns 'review_text' and 'sentiment'

# 1. Tokenize the text
tokenizer = Tokenizer(num_words=10000, oov_token='<OOV>')  # Limit to top 10,000 words, <OOV> for out-of-vocabulary words
tokenizer.fit_on_texts(reviews['review_text'])

# 2. Convert text to sequences (integer encoding)
X = tokenizer.texts_to_sequences(reviews['review_text'])

# 3. Pad the sequences to ensure they all have the same length
max_length = 250  # This can be adjusted depending on the average length of your reviews
X_padded = pad_sequences(X, padding='post', maxlen=max_length)

# 4. Encode the sentiment labels (binary classification: 0 or 1)
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(reviews['sentiment'])  # Sentiment should be ['positive', 'negative']

# 5. Split into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.25, random_state=42)

test_food = pd.read_csv('test_food.csv')

In [35]:
print(X_train.shape)  # Expected shape: (num_samples, max_sequence_length)
print(y_train.shape)  # Expected shape: (num_samples,)

(6486, 250)
(6486,)


In [36]:
# Define the LSTM model
model = Sequential()

# Add the Embedding layer
model.add(Embedding(input_dim=5000, output_dim=128, input_length=250))  # `input_length=250` matches the padded sequence length

# Add a SpatialDropout1D layer for regularization
model.add(SpatialDropout1D(0.2))

# Add the LSTM layer
model.add(LSTM(100, dropout=0.2, recurrent_dropout=0.2))  # LSTM with 100 units

# Add the output Dense layer
model.add(Dense(1, activation='sigmoid'))  # Sigmoid activation for binary classification

# Print the model summary
model.summary()

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])



In [26]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Callbacks
checkpoint = ModelCheckpoint('best_lstm_model.keras', monitor='val_loss', save_best_only=True, mode='min', verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, verbose=1)

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test),
                    callbacks=[checkpoint, early_stopping])

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test accuracy: {accuracy:.4f}")


Epoch 1/10


InvalidArgumentError: Graph execution error:

Detected at node sequential_4_1/embedding_4_1/GatherV2 defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "C:\Users\zackb\AppData\Roaming\Python\Python311\site-packages\ipykernel_launcher.py", line 18, in <module>

  File "C:\Users\zackb\AppData\Roaming\Python\Python311\site-packages\traitlets\config\application.py", line 1075, in launch_instance

  File "C:\Users\zackb\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelapp.py", line 739, in start

  File "C:\Users\zackb\AppData\Roaming\Python\Python311\site-packages\tornado\platform\asyncio.py", line 205, in start

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\asyncio\base_events.py", line 607, in run_forever

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\asyncio\base_events.py", line 1919, in _run_once

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\asyncio\events.py", line 80, in _run

  File "C:\Users\zackb\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue

  File "C:\Users\zackb\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 534, in process_one

  File "C:\Users\zackb\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell

  File "C:\Users\zackb\AppData\Roaming\Python\Python311\site-packages\ipykernel\ipkernel.py", line 362, in execute_request

  File "C:\Users\zackb\AppData\Roaming\Python\Python311\site-packages\ipykernel\kernelbase.py", line 778, in execute_request

  File "C:\Users\zackb\AppData\Roaming\Python\Python311\site-packages\ipykernel\ipkernel.py", line 449, in do_execute

  File "C:\Users\zackb\AppData\Roaming\Python\Python311\site-packages\ipykernel\zmqshell.py", line 549, in run_cell

  File "C:\Users\zackb\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3075, in run_cell

  File "C:\Users\zackb\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3130, in _run_cell

  File "C:\Users\zackb\AppData\Roaming\Python\Python311\site-packages\IPython\core\async_helpers.py", line 128, in _pseudo_sync_runner

  File "C:\Users\zackb\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3334, in run_cell_async

  File "C:\Users\zackb\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3517, in run_ast_nodes

  File "C:\Users\zackb\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3577, in run_code

  File "C:\Users\zackb\AppData\Local\Temp\ipykernel_19640\2142228734.py", line 8, in <module>

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 368, in fit

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 216, in function

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 129, in multi_step_on_iterator

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 110, in one_step_on_data

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\backend\tensorflow\trainer.py", line 56, in train_step

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\layers\layer.py", line 899, in __call__

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\ops\operation.py", line 46, in __call__

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\traceback_utils.py", line 156, in error_handler

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\models\sequential.py", line 213, in call

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\models\functional.py", line 182, in call

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\ops\function.py", line 171, in _run_through_graph

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\models\functional.py", line 632, in call

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\layers\layer.py", line 899, in __call__

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\traceback_utils.py", line 117, in error_handler

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\ops\operation.py", line 46, in __call__

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\traceback_utils.py", line 156, in error_handler

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\layers\core\embedding.py", line 140, in call

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\ops\numpy.py", line 5239, in take

  File "c:\Users\zackb\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\backend\tensorflow\numpy.py", line 2063, in take

indices[29,5] = 9185 is not in [0, 5000)
	 [[{{node sequential_4_1/embedding_4_1/GatherV2}}]] [Op:__inference_multi_step_on_iterator_25503]