In [4]:
import torch
import pandas as pd
import numpy as np

In [5]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("irkaal/foodcom-recipes-and-reviews")


print("Path to dataset files:", path)

Path to dataset files: /home/jamiros/.cache/kagglehub/datasets/irkaal/foodcom-recipes-and-reviews/versions/2


In [6]:
data_recipes = pd.read_csv(filepath_or_buffer="../Datasets/Food Values/recipes.csv")
data_reviews = pd.read_csv(filepath_or_buffer="../Datasets/Food Values/reviews.csv")

In [7]:
print(data_recipes.isnull().sum())

RecipeId                           0
Name                               0
AuthorId                           0
AuthorName                         0
CookTime                       82545
PrepTime                           0
TotalTime                          0
DatePublished                      0
Description                        5
Images                             1
RecipeCategory                   751
Keywords                       17237
RecipeIngredientQuantities         3
RecipeIngredientParts              0
AggregatedRating              253223
ReviewCount                   247489
Calories                           0
FatContent                         0
SaturatedFatContent                0
CholesterolContent                 0
SodiumContent                      0
CarbohydrateContent                0
FiberContent                       0
SugarContent                       0
ProteinContent                     0
RecipeServings                182911
RecipeYield                   348071
R

Replace null data with 0, since some foods may not have that macro-nutrient

In [8]:
data_recipes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 522517 entries, 0 to 522516
Data columns (total 28 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   RecipeId                    522517 non-null  int64  
 1   Name                        522517 non-null  object 
 2   AuthorId                    522517 non-null  int64  
 3   AuthorName                  522517 non-null  object 
 4   CookTime                    439972 non-null  object 
 5   PrepTime                    522517 non-null  object 
 6   TotalTime                   522517 non-null  object 
 7   DatePublished               522517 non-null  object 
 8   Description                 522512 non-null  object 
 9   Images                      522516 non-null  object 
 10  RecipeCategory              521766 non-null  object 
 11  Keywords                    505280 non-null  object 
 12  RecipeIngredientQuantities  522514 non-null  object 
 13  RecipeIngredie

Remove all the useless columns


In [17]:
selected_columns = ['Name','Calories', 'FatContent', 'SaturatedFatContent','CholesterolContent', 'SodiumContent', 'CarbohydrateContent','FiberContent', 'SugarContent', 'ProteinContent','RecipeInstructions']

filtered_data_recipes = data_recipes[selected_columns]
filtered_data_recipes = filtered_data_recipes.drop_duplicates()


In [18]:
filtered_data_recipes.info()

<class 'pandas.core.frame.DataFrame'>
Index: 522022 entries, 0 to 522516
Data columns (total 11 columns):
 #   Column               Non-Null Count   Dtype  
---  ------               --------------   -----  
 0   Name                 522022 non-null  object 
 1   Calories             522022 non-null  float64
 2   FatContent           522022 non-null  float64
 3   SaturatedFatContent  522022 non-null  float64
 4   CholesterolContent   522022 non-null  float64
 5   SodiumContent        522022 non-null  float64
 6   CarbohydrateContent  522022 non-null  float64
 7   FiberContent         522022 non-null  float64
 8   SugarContent         522022 non-null  float64
 9   ProteinContent       522022 non-null  float64
 10  RecipeInstructions   522022 non-null  object 
dtypes: float64(9), object(2)
memory usage: 47.8+ MB


In [19]:
from ydata_profiling import ProfileReport
design_report = ProfileReport(filtered_data_recipes)


In [20]:
design_report.to_notebook_iframe()

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

In [23]:
import re
import string

from sklearn.metrics.pairwise import cosine_similarity


# Preprocessing function to clean the text
def preprocess_text(text):
    # Remove special characters using regular expressions
    text = re.sub(r'[@#$%^&*!`~]', '', text)  # Removes specific special characters
    # Optionally, you can add additional text preprocessing steps like lowercasing, removing extra spaces, etc.
    text = text.lower()  # Convert to lowercase
    text = re.sub(r'\s+', ' ', text)  # Replace multiple spaces with a single space
    text = text.strip()  # Remove leading and trailing whitespaces
    return text

def clean_instructions(text):
    # Remove the leading 'c' and quotes
    text = text.lstrip('c("').rstrip('",)')
    # Remove commas
    text = text.replace(',', '')
    # Remove any remaining special characters if needed
    text = text.translate(str.maketrans("", "", string.punctuation))
    return text

filtered_data_recipes['Name'] = filtered_data_recipes['Name'].apply(preprocess_text)
filtered_data_recipes['RecipeInstructions'] = filtered_data_recipes['RecipeInstructions'].apply(clean_instructions)







In [66]:
# Predefined User Input (You can modify these values as needed)
user_profile = {}
user_profile["age"] = 25  # Example: 25 years old
user_profile["sex"] = "male"  # Example: female
user_profile["weight"] = 50   # Example: 60 kg
user_profile["height"] = 165  # Example: 165 cm
user_profile["activity_level"] = "sedentary"  # Example: moderate activity
user_profile["goal"] = ""  # Example: weight loss
user_profile["filtered_data_recipes_preference"] = "mixed"  # Example: vegetarian
user_profile["allergies"] = ["bird"]  # Example: allergies to nuts

In [67]:
# Step 1: Calculate BMR using Mifflin-St Jeor Formula
def calculate_bmr(weight, height, age, sex):
    if sex == "male":
        return (9.99 * weight) + (6.25 * height) - (4.92 * age) + 5
    else:
        return (9.99 * weight) + (6.25 * height) - (4.92 * age) - 161


In [70]:
# Step 2: Calculate TDEE (Total Daily Energy Expenditure) based on updated activity levels
def calculate_tdee(bmr, activity_level):
    activity_multipliers = {
        "sedentary": 1.2,
        "lightly_active": 1.375,
        "moderate": 1.55,
        "very_active": 1.725,
        "extra_active": 1.9
    }
    return bmr * activity_multipliers.get(activity_level, 1.2)


In [71]:

# Step 3: Adjust Calories Based on Goal
def adjust_calories(tdee, goal):
    if goal == "weight_loss":
        return tdee - 500  # 500 kcal deficit
    elif goal == "weight_gain":
        return tdee + 500  # 500 kcal surplus
    else:
        return tdee  # Maintenance



In [72]:
# Step 4: Macronutrient Distribution
def calculate_macros(calories, goal):
    if goal == "weight_loss":
        protein_ratio, fat_ratio, carb_ratio = 0.3, 0.3, 0.4  # Balanced for weight loss
    elif goal == "weight_gain":
        protein_ratio, fat_ratio, carb_ratio = 0.25, 0.3, 0.45  # Higher carbs for weight gain
    else:
        protein_ratio, fat_ratio, carb_ratio = 0.3, 0.3, 0.4  # Balanced for maintenance
    
    protein = (calories * protein_ratio) / 4  # 1g protein = 4 kcal
    fat = (calories * fat_ratio) / 9  # 1g fat = 9 kcal
    carbs = (calories * carb_ratio) / 4  # 1g carb = 4 kcal
    return protein, fat, carbs


In [78]:

# Step 5: Compute User Profile Nutritional Needs
bmr = calculate_bmr(user_profile["weight"], user_profile["height"], user_profile["age"], user_profile["sex"])
tdee = calculate_tdee(bmr, user_profile["activity_level"])
target_calories = adjust_calories(tdee, user_profile["goal"])
target_protein, target_fat, target_carbs = calculate_macros(target_calories, user_profile["goal"])




In [79]:
# Step 6: Filter Recipes Based on filtered_data_recipes Preferences & Allergies
filtered_filtered_data_recipes = filtered_data_recipes.copy()
if user_profile["filtered_data_recipes_preference"] in ["vegetarian", "vegan"]:
    filtered_filtered_data_recipes = filtered_filtered_data_recipes[filtered_filtered_data_recipes["RecipeInstructions"].str
                                                                    .contains(user_profile["filtered_data_recipes_preference"], case=False, na=False)]

for allergen in user_profile["allergies"]:
    filtered_filtered_data_recipes = filtered_filtered_data_recipes[~filtered_filtered_data_recipes["RecipeInstructions"].str
                                                                    .contains(allergen, case=False, na=False)]


In [80]:

# Step 7: Content-Based Recommendation Using Cosine Similarity
def recommend_meals(user_calories, user_fat, user_carbs, user_protein, top_n=7):
    # User profile vector includes all 9 features, and we fill missing features with zeros (or use other defaults)
    user_profile_vector = np.array([[user_calories, user_fat, user_carbs, user_protein, 0, 0, 0, 0, 0]])
    
    # Compute Cosine Similarity
    similarities = cosine_similarity(user_profile_vector, filtered_filtered_data_recipes.iloc[:, 1:10].values)  # Adjusting for 9 features
    
    # Rank meals based on similarity
    filtered_filtered_data_recipes["Similarity"] = similarities[0]
    
    top_recommendations = filtered_filtered_data_recipes.sort_values(by="Similarity", ascending=False).head(top_n)
   
    # Return recommendations in the desired format
    recommendations = []
    for _, row in top_recommendations.iterrows():
        recommendation = {
            "Name": row["Name"],
            "Calories": row["Calories"],
            "ProteinContent": row["ProteinContent"],
            "FatContent": row["FatContent"],
            "CarbohydrateContent": row["CarbohydrateContent"],
            "SodiumContent": row["SodiumContent"],
            "RecipeInstructions": row["RecipeInstructions"]
        }
        recommendations.append(recommendation)
    
    return recommendations


In [81]:

# Step 8: Get Recommendations
recommendations = recommend_meals(target_calories, target_fat, target_carbs, target_protein, top_n=7)
print(f"Target Calories: {target_calories}")
print(f"Target Fat: {target_fat}")
print(f"Target Carbs: {target_carbs}" )
print(f"Target Protein: {target_protein}")
print("\n\n")


# Print the recommendations in the desired format
for rec in recommendations:
    print(f"Name: {rec['Name']}")
    print(f"Calories: {rec['Calories']}")
    print(f"ProteinContent: {rec['ProteinContent']}")
    print(f"FatContent: {rec['FatContent']}")
    print(f"CarbohydrateContent: {rec['CarbohydrateContent']}")
    print(f"SodiumContent: {rec['SodiumContent']}")
    print(f"RecipeInstructions: {rec['RecipeInstructions']}")
    print("\n" + "-"*50 + "\n")

Target Calories: 1695.3
Target Fat: 56.51
Target Carbs: 169.53
Target Protein: 127.1475



Name: bird suet -- for the birds
Calories: 1570.6
ProteinContent: 6.4
FatContent: 166.2
CarbohydrateContent: 13.1
SodiumContent: 13.3
RecipeInstructions: Melt the suet available from your butcher over low heat In a mixing bowl mix the remaining ingredients together Allow the suet to cool somewhat and then pour in the mix and stir thoroughly When it thickens cools stuff the mixture in between the petals of an open pine cone Virginia pine cones are best because they open very nicely and can be picked up all over the eastern US but any open pine cone will work The big pine cones from the western US forests are great too but youll only need a couple of those ones 
Hang the pine cones from a string in your back yard As for the other ingredients your local farmfeed store can usually supply ingredients such as millet This recipe is a good project for kids on rainy days

---------------------------------