In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, GlobalMaxPooling1D
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder




In [2]:
# Load the dataset
df = pd.read_csv('./food_recipes.csv')

X = df['ingredients']
y = df['recipe_title']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Fill nulls with empty strings
X_train = X_train.fillna('')
X_test = X_test.fillna('')
y_train = y_train.fillna('')  
y_test = y_test.fillna('')

In [3]:
# Text vectorization using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [4]:
# Define the model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=64, input_length=X_train_vec.shape[1]))
# model.add(LSTM(100, return_sequences=True))
# model.add(GlobalMaxPooling1D())
model.add(Dense(50, activation='relu'))
model.add(Dense(len(df['recipe_title'].unique()), activation='softmax'))




In [5]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [6]:
# Convert labels to numerical format
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)

In [7]:
# Train the model
model.fit(X_train_vec.toarray(), y_train_encoded, epochs=5, batch_size=64, validation_split=0.1)

Epoch 1/5




InvalidArgumentError: Graph execution error:

Detected at node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "E:\Apps\Anaconda\Lib\site-packages\ipykernel_launcher.py", line 17, in <module>

  File "E:\Apps\Anaconda\Lib\site-packages\traitlets\config\application.py", line 992, in launch_instance

  File "E:\Apps\Anaconda\Lib\site-packages\ipykernel\kernelapp.py", line 736, in start

  File "E:\Apps\Anaconda\Lib\site-packages\tornado\platform\asyncio.py", line 195, in start

  File "E:\Apps\Anaconda\Lib\asyncio\base_events.py", line 607, in run_forever

  File "E:\Apps\Anaconda\Lib\asyncio\base_events.py", line 1922, in _run_once

  File "E:\Apps\Anaconda\Lib\asyncio\events.py", line 80, in _run

  File "E:\Apps\Anaconda\Lib\site-packages\ipykernel\kernelbase.py", line 516, in dispatch_queue

  File "E:\Apps\Anaconda\Lib\site-packages\ipykernel\kernelbase.py", line 505, in process_one

  File "E:\Apps\Anaconda\Lib\site-packages\ipykernel\kernelbase.py", line 412, in dispatch_shell

  File "E:\Apps\Anaconda\Lib\site-packages\ipykernel\kernelbase.py", line 740, in execute_request

  File "E:\Apps\Anaconda\Lib\site-packages\ipykernel\ipkernel.py", line 422, in do_execute

  File "E:\Apps\Anaconda\Lib\site-packages\ipykernel\zmqshell.py", line 546, in run_cell

  File "E:\Apps\Anaconda\Lib\site-packages\IPython\core\interactiveshell.py", line 3024, in run_cell

  File "E:\Apps\Anaconda\Lib\site-packages\IPython\core\interactiveshell.py", line 3079, in _run_cell

  File "E:\Apps\Anaconda\Lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner

  File "E:\Apps\Anaconda\Lib\site-packages\IPython\core\interactiveshell.py", line 3284, in run_cell_async

  File "E:\Apps\Anaconda\Lib\site-packages\IPython\core\interactiveshell.py", line 3466, in run_ast_nodes

  File "E:\Apps\Anaconda\Lib\site-packages\IPython\core\interactiveshell.py", line 3526, in run_code

  File "C:\Users\Edgard\AppData\Local\Temp\ipykernel_31752\181455356.py", line 2, in <module>

  File "E:\Apps\Anaconda\Lib\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler

  File "E:\Apps\Anaconda\Lib\site-packages\keras\src\engine\training.py", line 1807, in fit

  File "E:\Apps\Anaconda\Lib\site-packages\keras\src\engine\training.py", line 1401, in train_function

  File "E:\Apps\Anaconda\Lib\site-packages\keras\src\engine\training.py", line 1384, in step_function

  File "E:\Apps\Anaconda\Lib\site-packages\keras\src\engine\training.py", line 1373, in run_step

  File "E:\Apps\Anaconda\Lib\site-packages\keras\src\engine\training.py", line 1151, in train_step

  File "E:\Apps\Anaconda\Lib\site-packages\keras\src\engine\training.py", line 1209, in compute_loss

  File "E:\Apps\Anaconda\Lib\site-packages\keras\src\engine\compile_utils.py", line 277, in __call__

  File "E:\Apps\Anaconda\Lib\site-packages\keras\src\losses.py", line 143, in __call__

  File "E:\Apps\Anaconda\Lib\site-packages\keras\src\losses.py", line 270, in call

  File "E:\Apps\Anaconda\Lib\site-packages\keras\src\losses.py", line 2454, in sparse_categorical_crossentropy

  File "E:\Apps\Anaconda\Lib\site-packages\keras\src\backend.py", line 5775, in sparse_categorical_crossentropy

logits and labels must have the same first dimension, got logits shape [73728,6744] and labels shape [64]
	 [[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_1034]

In [8]:
# Evaluate the model on the test set
y_test_encoded = label_encoder.fit_transform(y_test)

In [9]:
loss, accuracy = model.evaluate(X_test_vec.toarray(), y_test_encoded)

print(f'Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}')

ValueError: Data cardinality is ambiguous:
  x sizes: 10
  y sizes: 1349
Make sure all arrays contain the same number of samples.