In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.layers import Input, Embedding, Flatten, Dense, Concatenate, Dropout
from keras.models import Model
from keras.optimizers import Adam

In [2]:
# Load the dataset
data = pd.read_csv('../dataset/preprocessed_data.csv')

In [3]:
# Encode the categorical features
encoder = LabelEncoder()
data['food_types'] = encoder.fit_transform(data['food_types'])

In [4]:
# Standardize the numerical features
scaler = StandardScaler()
num_cols = ['minutes', 'n_steps', 'n_ingredients', 'calories', 'total_fat', 'sugar', 'sodium', 'protein', 'saturated_fat', 'carbohydrates']
data[num_cols] = scaler.fit_transform(data[num_cols])

In [5]:
# Create the user-item interaction matrix
n_users = data['user_id'].nunique()
n_items = data['recipe_id'].nunique()
user_item_matrix = np.zeros((n_users, n_items))
for row in data.itertuples():
    user_item_matrix[row[1]-1, row[14]-1] = row[15]

IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices

In [None]:
# Define the MLP model architecture
user_input = Input(shape=(1,))
user_embedding = Embedding(n_users, 64, input_length=1)(user_input)
user_embedding = Flatten()(user_embedding)

item_input = Input(shape=(1,))
item_embedding = Embedding(n_items, 64, input_length=1)(item_input)
item_embedding = Flatten()(item_embedding)

numeric_input = Input(shape=(10,))
dense_1 = Dense(128, activation='relu')(numeric_input)
dense_1 = Dropout(0.5)(dense_1)

concat = Concatenate()([user_embedding, item_embedding, dense_1])
dense_2 = Dense(64, activation='relu')(concat)
dense_2 = Dropout(0.5)(dense_2)

output = Dense(1)(dense_2)

model = Model(inputs=[user_input, item_input, numeric_input], outputs=output)
model.compile(loss='mse', optimizer=Adam(lr=0.001))

# Train the MLP model
model.fit(x=[user_item_matrix[:, 0], user_item_matrix[:, 1], data[num_cols]], y=user_item_matrix[:, 14], batch_size=64, epochs=10, validation_split=0.1)

In [None]:
# Use the MLP model to suggest similar recipe_ids
def suggest_similar_recipes(recipe_id, n=5):
    item_idx = recipe_id - 1
    item_vec = np.zeros((1, 1))
    item_vec[0, 0] = item_idx

    user_ratings = model.predict([np.arange(n_users), np.repeat(item_idx, n_users), np.tile(data.loc[item_idx, num_cols].values, (n_users, 1))])
    user_ratings = user_ratings.flatten()

    top_users = user_ratings.argsort()[-n-1:-1]
    top_recipes = []
    for user in top_users:
        top_recipe_idx = np.argmax(user_item_matrix[user, :])
        top_recipes.append(top_recipe_idx+1)
    return top_recipes