In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, GlobalMaxPooling1D
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import LabelEncoder

In [11]:
# Load the dataset
df = pd.read_csv('./food_recipes_2.csv')

X = df['ingredient1','ingredient2','ingredient3','ingredient4','ingredient5','ingredient6','ingredient7','ingredient8','ingredient9','ingredient10']
y = df['recipe_title']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#Fill nulls with empty strings
X_train = X_train.fillna('')
X_test = X_test.fillna('')
y_train = X_train.fillna('')
y_test = X_test.fillna('')

In [12]:
# Text vectorization using TF-IDF
vectorizer = TfidfVectorizer(max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [13]:
# Define the model
model = Sequential()
model.add(Embedding(input_dim=5000, output_dim=64, input_length=X_train_vec.shape[1]))
model.add(LSTM(100, return_sequences=True))
model.add(GlobalMaxPooling1D())
model.add(Dense(50, activation='relu'))
model.add(Dense(len(df['recipe_title'].unique()), activation='softmax'))

In [14]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [15]:
# Convert labels to numerical format
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)

In [16]:
# Train the model
model.fit(X_train_vec.toarray(), y_train_encoded, epochs=5, batch_size=64, validation_split=0.1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x1cdc7311910>

In [25]:
# Evaluate the model on the test set
y_test_encoded = label_encoder.fit_transform(y_test)

In [26]:
loss, accuracy = model.evaluate(X_test_vec.toarray(), y_test_encoded)

print(f'Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}')

Test Loss: 8.7314, Test Accuracy: 0.0007
