In [2]:
pip install tensorflow_recommenders

Collecting tensorflow_recommenders
  Downloading tensorflow_recommenders-0.7.3-py3-none-any.whl (96 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/96.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━[0m [32m92.2/96.2 kB[0m [31m2.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.2/96.2 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow_recommenders
Successfully installed tensorflow_recommenders-0.7.3


In [25]:
import tensorflow as tf
import tensorflow_recommenders as tfrs
import pandas as pd
import numpy as np
import pickle

# Example data structure with titles and ingredient groups
df = pd.read_csv('sabi_kali_with_ids.csv')
ingredients_df = pd.read_csv('UniqueIngredients.csv')

# Function to calculate daily calorie needs based on height and weight
def calculate_calories(height_cm, weight_kg, age, gender):
    if gender == 'male':
        bmr = 66 + (13.7 * weight_kg) + (5 * height_cm) - (6.8 * age)
    else:
        bmr = 655 + (9.6 * weight_kg) + (1.8 * height_cm) - (4.7 * age)
    daily_calories = bmr * 1.2  # Assuming sedentary activity level
    return daily_calories

# Convert DataFrame to TensorFlow Dataset
tf_dataset = tf.data.Dataset.from_tensor_slices({
    'recipe_id': df['recipe_id'].values,
    'title': df['Title'].values,
    'ingredients': df['Simplified Ingredients'].values,
    'calories': df['Calories'].values,
    'ingredient_groups': df['Ingredient Groups'].values
})

# Shuffle and batch the dataset
tf_dataset = tf_dataset.shuffle(10000).batch(128).cache()

class RecipeModel(tfrs.Model):

    def __init__(self, ingredient_vocab, embedding_dimension=32):
        super().__init__()

        # Define recipe embedding layers
        self.ingredient_lookup = tf.keras.layers.StringLookup(vocabulary=ingredient_vocab, mask_token=None)
        self.ingredient_embedding = tf.keras.layers.Embedding(len(ingredient_vocab) + 1, embedding_dimension)

        # Define the task
        self.task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=tf.data.Dataset.from_tensor_slices(ingredient_vocab).batch(128).map(self.compute_candidate_embeddings)
            )
        )

    def compute_candidate_embeddings(self, ingredients):
        ingredients_split = tf.strings.split(ingredients, sep=', ')
        ingredient_ids = self.ingredient_lookup(ingredients_split)
        ingredient_embeddings = self.ingredient_embedding(ingredient_ids)
        recipe_embeddings = tf.reduce_mean(ingredient_embeddings, axis=1)
        return recipe_embeddings

    def compute_query_embeddings(self, ingredients):
        ingredients_split = tf.strings.split(ingredients, sep=', ')
        ingredient_ids = self.ingredient_lookup(ingredients_split)
        ingredient_embeddings = self.ingredient_embedding(ingredient_ids)
        recipe_embeddings = tf.reduce_mean(ingredient_embeddings, axis=1)
        return recipe_embeddings

    def call(self, features):
        # Forward pass to compute embeddings
        if isinstance(features, dict):
            return self.compute_query_embeddings(features['ingredients'])
        else:
            return self.compute_query_embeddings(features)

    def compute_loss(self, features, training=False):
        query_embeddings = self(features)
        return self.task(query_embeddings=query_embeddings, candidate_embeddings=query_embeddings)

def get_user_input():
    # Get height and validate
    while True:
        try:
            height_cm = float(input("Enter your height in cm: "))
            if height_cm <= 0:
                raise ValueError("Height must be a positive number.")
            break
        except ValueError as e:
            print(f"Invalid input: {e}")

    # Get weight and validate
    while True:
        try:
            weight_kg = float(input("Enter your weight in kg: "))
            if weight_kg <= 0:
                raise ValueError("Weight must be a positive number.")
            break
        except ValueError as e:
            print(f"Invalid input: {e}")

    # Get age and validate
    while True:
        try:
            age = int(input("Enter your age: "))
            if age <= 0:
                raise ValueError("Age must be a positive number.")
            break
        except ValueError as e:
            print(f"Invalid input: {e}")

    # Get gender and validate
    while True:
        gender = input("Enter your gender (male/female): ").strip().lower()
        if gender in ['male', 'female']:
            break
        else:
            print("Invalid input: Gender must be 'male' or 'female'.")

    # Get activity level and validate
    activity_levels = {
        'sedentary': 1.2,
        'lightly active': 1.375,
        'moderately active': 1.55,
        'very active': 1.725,
        'extra active': 1.9
    }

    print("Select your activity level from the following options:")
    for level in activity_levels:
        print(f"- {level}")

    while True:
        activity_level = input("Enter your activity level: ").strip().lower()
        if activity_level in activity_levels:
            activity_factor = activity_levels[activity_level]
            break
        else:
            print("Invalid input: Please select a valid activity level.")

    # Get the list of unique ingredients
    unique_ingredients = ingredients_df['Ingredient'].dropna().str.lower().tolist()

    # Get allergens and dietary preferences from the user
    while True:
        user_allergens = input("Enter allergens separated by commas (e.g., nuts, gluten): ").split(',')
        user_allergens = [allergen.strip().lower() for allergen in user_allergens]  # Remove leading/trailing spaces and convert to lowercase

        # Check if the allergens are in the unique ingredients list
        invalid_allergens = [allergen for allergen in user_allergens if allergen not in unique_ingredients]

        if invalid_allergens:
            print(f"The following allergens are not recognized: {', '.join(invalid_allergens)}")
            print("Please enter recognized allergens.")
        else:
            print("All allergens are recognized.")
            break

    # Join allergens into a single string
    user_allergens = ', '.join(user_allergens)

    dietary_preferences = input("Enter dietary preferences separated by commas (e.g., vegan, vegetarian): ").split(',')
    dietary_preferences = [preference.strip() for preference in dietary_preferences]  # Remove leading/trailing spaces

    # Get ingredients user has
    while True:
        user_ingredients = input("Enter ingredients you have separated by commas (e.g., chicken, rice): ").split(',')
        user_ingredients = [ingredient.strip().lower() for ingredient in user_ingredients]  # Remove leading/trailing spaces and convert to lowercase

        # Check if the ingredients are in the unique ingredients list
        invalid_ingredients = [ingredient for ingredient in user_ingredients if ingredient not in unique_ingredients]

        if invalid_ingredients:
            print(f"The following ingredients are not recognized: {', '.join(invalid_ingredients)}")
            print("Please enter recognized ingredients.")
        else:
            print("All ingredients are recognized.")
            break

    # Join ingredients into a single string
    user_ingredients = ', '.join(user_ingredients)

    return height_cm, weight_kg, age, gender, activity_factor, user_allergens, dietary_preferences, user_ingredients

height_cm, weight_kg, age, gender, activity_factor, user_allergens, dietary_preferences, user_ingredients = get_user_input()

# Create vocabulary of ingredients
ingredient_vocab = sorted(set(', '.join(df['Simplified Ingredients']).split(', ')))

# Instantiate the model
model = RecipeModel(ingredient_vocab)
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

# Train the model
model.fit(tf_dataset, epochs=100)

# Save the model configuration and weights separately
model_config = model.get_config()
model_weights = model.get_weights()

# Save the configuration and weights to disk
with open('recipe_model_config.pkl', 'wb') as f:
    pickle.dump(model_config, f)
with open('recipe_model_weights.pkl', 'wb') as f:
    pickle.dump(model_weights, f)

# Load the configuration and weights from disk
with open('recipe_model_config.pkl', 'rb') as f:
    loaded_config = pickle.load(f)
with open('recipe_model_weights.pkl', 'rb') as f:
    loaded_weights = pickle.load(f)

# Reconstruct the model
reconstructed_model = RecipeModel(ingredient_vocab)
sample_input = {'ingredients': tf.constant(["beef"])}
reconstructed_model(sample_input)
reconstructed_model.set_weights(loaded_weights)
reconstructed_model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.5))

# Use BruteForce layer to find top recommendations
index = tfrs.layers.factorized_top_k.BruteForce(reconstructed_model)

# Prepare candidates dataset
candidate_embeddings = tf.data.Dataset.from_tensor_slices({
    'recipe_id': df['recipe_id'].values,
    'title': df['Title'].values,
    'ingredients': df['Simplified Ingredients'].values
}).batch(128).map(lambda x: (x['recipe_id'], reconstructed_model.compute_candidate_embeddings(x['ingredients'])))

# Set the index from the candidate embeddings
index.index_from_dataset(candidate_embeddings)

# Get recommendations
example_recipe = [user_ingredients]
user_calories = calculate_calories(height_cm, weight_kg, age, gender) * activity_factor
min_calories = user_calories * 0.9 / 4
max_calories = user_calories * 1.1 / 4
excluded_ingredients = [user_allergens]

# Filter
filtered_df = df[(df['Calories'] >= min_calories) & (df['Calories'] <= max_calories)]

def contains_excluded_ingredient(ingredients, excluded_ingredients):
    ingredients_set = set(ingredients.split(', '))
    excluded_set = set(excluded_ingredients)
    return not ingredients_set.isdisjoint(excluded_set)

filtered_df = filtered_df[~filtered_df['Simplified Ingredients'].apply(lambda x: contains_excluded_ingredient(x, excluded_ingredients))]


filtered_tf_dataset = tf.data.Dataset.from_tensor_slices({
    'recipe_id': filtered_df['recipe_id'].values,
    'title': filtered_df['Title'].values,
    'ingredients': filtered_df['Simplified Ingredients'].values,
    'ingredient_groups': filtered_df['Ingredient Groups'].values
}).batch(128).map(lambda x: (x['recipe_id'], reconstructed_model.compute_candidate_embeddings(x['ingredients'])))

index.index_from_dataset(filtered_tf_dataset)

example_recipe_tensor = tf.constant(example_recipe)
scores, top_k = index(example_recipe_tensor, k=4)

print("Top recommendations:")
for i in range(len(top_k[0])):
    recipe_id = top_k[0][i].numpy()
    print(f"Recommended recipe ID: {recipe_id}")
    matching_recipes = filtered_df[filtered_df['recipe_id'] == recipe_id]

    if not matching_recipes.empty:
        title = matching_recipes.Title.values[0]
        ingredients = matching_recipes['Simplified Ingredients'].values[0]
        ingredient_groups = matching_recipes['Ingredient Groups'].values[0]
        calories = matching_recipes['Calories'].values[0]
        print(f"Title: {title}\nIngredients: {ingredients}\nIngredient Groups: {ingredient_groups}\Calories: {calories}\n")
    else:
        print(f"Tidak ada resep yang cocok untuk ID resep: {recipe_id}\n")


Enter your height in cm: 178
Enter your weight in kg: 78
Enter your age: 21
Enter your gender (male/female): male
Select your activity level from the following options:
- sedentary
- lightly active
- moderately active
- very active
- extra active
Enter your activity level: sedentary
Enter allergens separated by commas (e.g., nuts, gluten): beef
All allergens are recognized.
Enter dietary preferences separated by commas (e.g., vegan, vegetarian): 
Enter ingredients you have separated by commas (e.g., chicken, rice): milk
All ingredients are recognized.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/