In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/food-com-recipes-and-user-interactions/RAW_interactions.csv
/kaggle/input/food-com-recipes-and-user-interactions/ingr_map.pkl
/kaggle/input/food-com-recipes-and-user-interactions/PP_recipes.csv
/kaggle/input/food-com-recipes-and-user-interactions/RAW_recipes.csv
/kaggle/input/food-com-recipes-and-user-interactions/interactions_train.csv
/kaggle/input/food-com-recipes-and-user-interactions/interactions_test.csv
/kaggle/input/food-com-recipes-and-user-interactions/PP_users.csv
/kaggle/input/food-com-recipes-and-user-interactions/interactions_validation.csv


In [2]:
import pandas as pd
from collections import Counter
import ast
import re

# Function to safely evaluate the tags string to a list
def safe_eval_tags(tags_string):
    try:
        # Remove any extra brackets and split the string
        cleaned_string = re.sub(r'\]\[', ', ', tags_string.strip('[]'))
        return ast.literal_eval(f'[{cleaned_string}]')
    except:
        return []

# Simulated function to mimic reading from a dataset with limited rows
def read_limited_csv(file_path, max_rows=10000, chunksize=1000):
    rows_read = 0
    for chunk in pd.read_csv(file_path, chunksize=chunksize):
        if rows_read + len(chunk) > max_rows:
            yield chunk.iloc[:max_rows - rows_read]
            break
        yield chunk
        rows_read += len(chunk)

# Initialize a global tag counter
global_tag_counter = Counter()

# Process the data in chunks
chunk_number = 0
max_rows = 10000
for chunk in read_limited_csv('/kaggle/input/food-com-recipes-and-user-interactions/RAW_recipes.csv', max_rows=max_rows):
    chunk_number += 1

    # Initialize a tag counter for this chunk
    chunk_tag_counter = Counter()

    # Process tags in this chunk
    for tags in chunk['tags']:
        tag_list = safe_eval_tags(tags)
        chunk_tag_counter.update(tag_list)

    # Update global counter
    global_tag_counter.update(chunk_tag_counter)

    # Print chunk-level results
    print(f"\nChunk {chunk_number}:")
    print(f"Total Recipes in Chunk: {len(chunk)}")
    print("Top 5 Tags in Chunk:")
    for tag, count in chunk_tag_counter.most_common(5):
        print(f"{tag}: {count}")

# Get the total number of recipes processed
total_recipes = max_rows

# Print global results
print("\nGlobal Tag Analysis:")
print(f"Total unique tags: {len(global_tag_counter)}")
print("Top 20 tags across all processed rows:")
for tag, count in global_tag_counter.most_common(20):
    percentage = (count / total_recipes) * 100
    print(f"{tag}: {count} ({percentage:.2f}%)")

# Least common tags
print("\nBottom 20 least common tags:")
for tag, count in global_tag_counter.most_common()[-20:]:
    percentage = (count / total_recipes) * 100
    print(f"{tag}: {count} ({percentage:.2f}%)")

# Tags appearing in more than 10% of recipes
common_tags = [tag for tag, count in global_tag_counter.items() if (count / total_recipes) > 0.1]
print(f"\nTags that appear in more than 10% of recipes: {common_tags}")

# Tags appearing in less than 1% of recipes
rare_tags = [tag for tag, count in global_tag_counter.items() if (count / total_recipes) < 0.01]
print(f"\nNumber of tags that appear in less than 1% of recipes: {len(rare_tags)}")



Chunk 1:
Total Recipes in Chunk: 1000
Top 5 Tags in Chunk:
preparation: 993
time-to-make: 965
course: 961
dietary: 711
main-ingredient: 674

Chunk 2:
Total Recipes in Chunk: 1000
Top 5 Tags in Chunk:
preparation: 991
time-to-make: 954
course: 950
dietary: 709
main-ingredient: 676

Chunk 3:
Total Recipes in Chunk: 1000
Top 5 Tags in Chunk:
preparation: 994
time-to-make: 970
course: 931
dietary: 723
main-ingredient: 696

Chunk 4:
Total Recipes in Chunk: 1000
Top 5 Tags in Chunk:
preparation: 998
time-to-make: 981
course: 953
dietary: 692
main-ingredient: 643

Chunk 5:
Total Recipes in Chunk: 1000
Top 5 Tags in Chunk:
preparation: 996
time-to-make: 975
course: 954
main-ingredient: 679
dietary: 671

Chunk 6:
Total Recipes in Chunk: 1000
Top 5 Tags in Chunk:
preparation: 995
time-to-make: 982
course: 934
dietary: 719
main-ingredient: 648

Chunk 7:
Total Recipes in Chunk: 1000
Top 5 Tags in Chunk:
preparation: 999
time-to-make: 983
course: 973
main-ingredient: 856
fruit: 769

Chunk 8:
Total

In [3]:
import re

# Refine the filtering logic to exclude non-time-related tags
def is_time_related(tag):
    # Match tags that explicitly include time units like minutes, hours, or specific patterns like "1-day-or-more"
    time_patterns = [
        r"\b\d+-minutes?\b",  # Matches "15-minutes", "30-minutes", etc.
        r"\b\d+-hours?\b",    # Matches "4-hours", "1-hour", etc.
        r"\b\d+-day(?:s)?-or-more\b",  # Matches "1-day-or-more"
        r"\b\d+-minutes-or-less\b",  # Matches "30-minutes-or-less", "15-minutes-or-less", etc.
        r"\b\d+-hours-or-less\b",    # Matches "4-hours-or-less"
    ]
    return any(re.search(pattern, tag) for pattern in time_patterns)

# Apply the refined logic
time_related_tags = {tag: count for tag, count in global_tag_counter.items() if is_time_related(tag)}

# Print the refined time-related tags
print("Refined Time-related tags:")
for tag, count in sorted(time_related_tags.items(), key=lambda x: x[1], reverse=True):
    print(f"{tag}: {count}")

# Display the top 10 most common refined time-related tags
print("\nTop 10 refined most common time-related tags:")
for tag, count in sorted(time_related_tags.items(), key=lambda x: x[1], reverse=True)[:10]:
    print(f"{tag}: {count}")


Refined Time-related tags:
60-minutes-or-less: 2913
30-minutes-or-less: 2195
4-hours-or-less: 2195
15-minutes-or-less: 2172
1-day-or-more: 113

Top 10 refined most common time-related tags:
60-minutes-or-less: 2913
30-minutes-or-less: 2195
4-hours-or-less: 2195
15-minutes-or-less: 2172
1-day-or-more: 113


In [4]:
# # Categorize tags (this is a manual process, here's a start)
# time_of_day_tags = {'breakfast', 'brunch', 'lunch', 'dinner', 'snack'}
# ingredient_tags = {'fruits', 'vegetables', 'meat', 'poultry', 'seafood', 'dairy'}
# preparation_tags = {'easy', 'quick', '15-minutes-or-less', '30-minutes-or-less', '60-minutes-or-less'}
# occasion_tags = {'christmas', 'thanksgiving', 'halloween', 'easter', 'holiday', 'summer', 'winter', 'fall', 'spring'}

# # Function to check if a recipe has tags from a specific category
# def has_tag_from_category(tags, category_tags):
#     return bool(set(eval(tags)) & category_tags)

# # Add boolean columns for each category
# df['is_breakfast'] = df['tags'].apply(lambda x: has_tag_from_category(x, {'breakfast'}))
# df['is_lunch'] = df['tags'].apply(lambda x: has_tag_from_category(x, {'lunch'}))
# df['is_dinner'] = df['tags'].apply(lambda x: has_tag_from_category(x, {'dinner'}))
# df['is_easy'] = df['tags'].apply(lambda x: has_tag_from_category(x, {'easy'}))
# df['is_quick'] = df['tags'].apply(lambda x: has_tag_from_category(x, preparation_tags))
# df['is_holiday'] = df['tags'].apply(lambda x: has_tag_from_category(x, occasion_tags))
# df['has_fruits'] = df['tags'].apply(lambda x: has_tag_from_category(x, {'fruits'}))

# print(df[['name', 'is_breakfast', 'is_lunch', 'is_dinner', 'is_easy', 'is_quick', 'is_holiday', 'has_fruits']].head())

## Aim is to utilize the tags for content based similarity based on tags maybe levraging a little bit of NLP towards the end
- first goal : get results on RAW data
- second goal : pre-process data group them into more better / informative tags

In [5]:
import pandas as pd
import ast
import re
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

# Define a function to extract time-related tags
def extract_time_tags(tags):
    time_patterns = [
        r"\b\d+-minutes?\b",         # Matches "15-minutes", "30-minutes", etc.
        r"\b\d+-hours?\b",           # Matches "4-hours", "1-hour", etc.
        r"\b\d+-day(?:s)?-or-more\b",  # Matches "1-day-or-more"
        r"\b\d+-minutes-or-less\b",  # Matches "30-minutes-or-less", "15-minutes-or-less", etc.
        r"\b\d+-hours-or-less\b"     # Matches "4-hours-or-less"
    ]
    try:
        # Convert string to list
        tag_list = ast.literal_eval(tags)
        # Filter tags matching the time patterns
        return [tag for tag in tag_list if any(re.search(pattern, tag) for pattern in time_patterns)]
    except:
        return []

# Read a sample of the dataset
file_path = '/kaggle/input/food-com-recipes-and-user-interactions/RAW_recipes.csv'
chunk_size = 5000  # Define chunk size for processing
sample_frac = 0.1  # Fraction of data to sample

# Process the dataset in chunks to sample
dfs = []
for chunk in pd.read_csv(file_path, chunksize=chunk_size):
    sampled_chunk = chunk.sample(frac=sample_frac, random_state=42)
    dfs.append(sampled_chunk)

# Combine sampled chunks
recipes_df = pd.concat(dfs)

# Extract time-related tags into a new column
recipes_df['time_tags'] = recipes_df['tags'].apply(extract_time_tags)

# Drop rows with no time tags
recipes_df = recipes_df[recipes_df['time_tags'].apply(len) > 0]

# Convert time_tags to a string format for vectorization
recipes_df['time_tags_str'] = recipes_df['time_tags'].apply(lambda tags: ' '.join(tags))

# Use CountVectorizer to create a binary matrix for the time tags
vectorizer = CountVectorizer(tokenizer=lambda x: x.split(' '))
time_tags_matrix = vectorizer.fit_transform(recipes_df['time_tags_str'])

# Compute cosine similarity on the matrix
similarity_matrix = cosine_similarity(time_tags_matrix)

# Convert the similarity matrix to a DataFrame for easy lookup
similarity_df = pd.DataFrame(similarity_matrix, index=recipes_df.index, columns=recipes_df.index)

# Example: Find top 3 similar recipes for a given recipe index
def get_similar_recipes(recipe_index, top_n=3):
    # Sort recipes by similarity score
    similar_scores = similarity_df[recipe_index].sort_values(ascending=False)
    # Exclude the recipe itself and return top N recommendations
    similar_recipes = similar_scores.drop(recipe_index).head(top_n)
    return similar_recipes

# Test the recommendation system with a sample recipe
recipe_index = recipes_df.index[0]  # Select the first recipe index
recommended_recipes = get_similar_recipes(recipe_index, top_n=3)

# Print recommendations
print(f"Top {len(recommended_recipes)} similar recipes for Recipe {recipe_index} based on time tags:")
print(recommended_recipes)

# Save the filtered and sampled dataframe for future use
recipes_df.to_csv('sampled_recipes_with_time_tags.csv', index=False)




Top 3 similar recipes for Recipe 1501 based on time tags:
110177    1.0
114392    1.0
112509    1.0
Name: 1501, dtype: float64


In [6]:
# Access specific rows by index
rows_of_interest = recipes_df.loc[[110177, 114392, 112509]]
rows_of_interest


Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients,time_tags,time_tags_str
110177,i can t wait for chole,25632,35,37636,2002-04-18,"['60-minutes-or-less', 'time-to-make', 'course...","[300.0, 4.0, 16.0, 26.0, 24.0, 1.0, 19.0]",11,"['in large saucepan over medium heat , heat cu...","a nice spicy indian chickpea recipe. i know, i...","['onions', 'fresh ginger', 'garlic', 'cumin se...",14,[60-minutes-or-less],60-minutes-or-less
114392,japanese crispy fried chicken kara age,494787,40,2678032,2013-02-05,"['lactose', '60-minutes-or-less', 'time-to-mak...","[397.4, 26.0, 6.0, 35.0, 47.0, 24.0, 11.0]",7,"['cut chicken in bite size pieces', 'in a plas...",this is a classic japanese kara-age recipe. ch...,"['boneless chicken thighs', 'ginger', 'clove',...",8,[60-minutes-or-less],60-minutes-or-less
112509,italian christmas cookies filled,420915,60,1559190,2010-04-19,"['60-minutes-or-less', 'time-to-make', 'course...","[200.6, 13.0, 39.0, 0.0, 4.0, 9.0, 9.0]",12,"['sift together flours and salt', 'cut shorten...",these are cookies my grandmother used to make ...,"['pastry flour', 'flour', 'salt', 'shortening'...",10,[60-minutes-or-less],60-minutes-or-less


In [7]:
# recipes_df.iloc[0]

In [8]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import ast

# Function to tokenize ingredients
def tokenize_ingredients(ingredients):
    # Flatten ingredient list into a single string
    ingredient_str = ' '.join(ingredients).lower()
    return ingredient_str.split()  # Tokenize into words (no stopword removal)

# Apply tokenization to the ingredients column
recipes_df['processed_ingredients'] = recipes_df['ingredients'].apply(
    lambda x: tokenize_ingredients(ast.literal_eval(x))
)

# Combine time_tags and processed_ingredients
def combine_features(row):
    time_tags_str = ' '.join(row['time_tags'])
    ingredients_str = ' '.join(row['processed_ingredients'])
    return f"{time_tags_str} {ingredients_str}"

recipes_df['combined_features'] = recipes_df.apply(combine_features, axis=1)

# Use CountVectorizer to vectorize the combined_features column
vectorizer = CountVectorizer(tokenizer=lambda x: x.split(' '))
combined_features_matrix = vectorizer.fit_transform(recipes_df['combined_features'])

# Compute cosine similarity on the combined feature matrix
combined_similarity_matrix = cosine_similarity(combined_features_matrix)

# Convert the similarity matrix to a DataFrame for easy lookup
combined_similarity_df = pd.DataFrame(combined_similarity_matrix, index=recipes_df.index, columns=recipes_df.index)

# Function to get top N similar recipes
def get_similar_recipes(recipe_index, top_n=25):
    # Sort recipes by similarity score
    similar_scores = combined_similarity_df[recipe_index].sort_values(ascending=False)
    # Exclude the recipe itself and return top N recommendations
    similar_recipes = similar_scores.drop(recipe_index).head(top_n)
    return similar_recipes

# Test the recommendation system with a sample recipe
recipe_index = recipes_df.index[0]  # Replace with your recipe ID
recommended_recipes = get_similar_recipes(recipe_index, top_n=25)

# Print recommendations
print(f"Top {len(recommended_recipes)} similar recipes for Recipe {recipes_df.iloc[recipe_index]['name']} (Index: {recipe_index}):")
print(recommended_recipes)

# Access details of recommended recipes
print("\nDetails of recommended recipes:")
print(recipes_df.loc[recommended_recipes.index, ['name', 'ingredients', 'time_tags']])





Top 25 similar recipes for Recipe baked shrimp toasts (Index: 1501):
13907     0.782624
139758    0.737865
173899    0.717137
123625    0.711512
128456    0.700000
94923     0.700000
226020    0.700000
28106     0.700000
110691    0.676123
28957     0.676123
151907    0.676123
143181    0.670820
187417    0.667424
48673     0.667424
52225     0.667424
22232     0.667424
102907    0.667424
67483     0.667424
69415     0.667424
212594    0.667424
131973    0.653197
22413     0.653197
199771    0.652929
201647    0.652929
220468    0.652929
Name: 1501, dtype: float64

Details of recommended recipes:
                                                   name  \
13907                 baked cinnamon sugar french toast   
139758          mother s one hour rolls  mors tvebakker   
173899                               rich baked custard   
123625                               light brioche buns   
128456                                        magic pie   
94923                        grandma chin 

In [9]:
recipes_df['combined_features']

1501      60-minutes-or-less dry yeast milk butter eggs ...
2586      60-minutes-or-less cooked chicken breast dry m...
2653      4-hours-or-less all-purpose flour whole wheat ...
1055      15-minutes-or-less desiccated coconut sweetene...
705       15-minutes-or-less cranberry juice cocktail un...
                                ...                        
231022    4-hours-or-less eggs unsweetened applesauce va...
230168    15-minutes-or-less light chunk tuna in water b...
230680    60-minutes-or-less tilapia fillets fresh coria...
231182    60-minutes-or-less butter zucchini fresh tarra...
230602    60-minutes-or-less hot pepper sauce green pepp...
Name: combined_features, Length: 22021, dtype: object

### Let's remove Time - Tags and see what the next most occuring domain

In [10]:
from collections import Counter
import ast

# Flatten all tags into a single list
all_tags = []
recipes_df['tags'].apply(lambda x: all_tags.extend(ast.literal_eval(x)))

# Count the occurrences of each tag
tag_counter = Counter(all_tags)

# Sort tags by frequency
sorted_tags = tag_counter.most_common()

# Filter out time-related tags
filtered_tags = [(tag, count) for tag, count in sorted_tags if tag not in time_related_tags]

# # Display the filtered tags descending order
# print("Top 200 most common tags (excluding time-related tags):")
# for tag, count in filtered_tags[:200]:
#     print(f"{tag}: {count}")

# # Display the top 100 filtered tags
# print("Top 100 most common tags (excluding time-related tags):")
# for tag, count in filtered_tags[:100]:
#     print(f"{tag}: {count}")

# Total unique tags after filtering
print(f"\nTotal unique tags (excluding time-related tags): {len(filtered_tags)}")




Total unique tags (excluding time-related tags): 480


In [11]:
def preprocess_data(df):
    df[['calories', 'total fat (PDV)', 'sugar (PDV)', 'sodium (PDV)', 'protein (PDV)', 'saturated fat (PDV)', 'carbohydrates (PDV)']] = df['nutrition'].str.split(",", expand=True)
    df['calories'] = df['calories'].apply(lambda x: x.replace('[',''))
    df['carbohydrates (PDV)'] = df['carbohydrates (PDV)'].apply(lambda x: x.replace(']',''))
    df[['calories', 'total fat (PDV)', 'sugar (PDV)', 'sodium (PDV)', 'protein (PDV)', 'saturated fat (PDV)', 'carbohydrates (PDV)']] = df[['calories', 'total fat (PDV)', 'sugar (PDV)', 'sodium (PDV)', 'protein (PDV)', 'saturated fat (PDV)', 'carbohydrates (PDV)']].astype('float')
    return(df)
Nutritional_df = preprocess_data(recipes_df)
#recipes_df.rename(columns={"name": "Recipe_name", "id": "Recipe_ID", "contributor_id": "User_ID"}, inplace=True)

recipes_df.head(2)

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,...,time_tags_str,processed_ingredients,combined_features,calories,total fat (PDV),sugar (PDV),sodium (PDV),protein (PDV),saturated fat (PDV),carbohydrates (PDV)
1501,49er flapjacks,400885,35,1396198,2009-11-23,"['60-minutes-or-less', 'time-to-make', 'course...","[287.6, 24.0, 29.0, 27.0, 24.0, 42.0, 8.0]",18,"['the night before your pancake meal , in smal...",almost a perfect reproduction of the original ...,...,60-minutes-or-less,"[dry, yeast, milk, butter, eggs, sugar, bread,...",60-minutes-or-less dry yeast milk butter eggs ...,287.6,24.0,29.0,27.0,24.0,42.0,8.0
2586,adobo chicken pie,296649,55,704950,2008-04-05,"['60-minutes-or-less', 'time-to-make', 'course...","[271.8, 14.0, 23.0, 11.0, 26.0, 15.0, 12.0]",15,"['preheat oven to 400 degrees f', 'in a large ...",i love that chicken tamale pie at trader joe's...,...,60-minutes-or-less,"[cooked, chicken, breast, dry, mustard, flour,...",60-minutes-or-less cooked chicken breast dry m...,271.8,14.0,23.0,11.0,26.0,15.0,12.0


In [12]:
recipes_df['High Protein'] = (recipes_df['protein (PDV)'] > 20).astype(int)
recipes_df['Very High Protein'] = (recipes_df['protein (PDV)'] > 30).astype(int)
recipes_df['Low Sugar'] = (recipes_df['sugar (PDV)'] < 5).astype(int)
recipes_df['Low Fat'] = (recipes_df['total fat (PDV)'] < 10).astype(int)
recipes_df['Low Calorie'] = (recipes_df['calories'] < 200).astype(int)
recipes_df['High Calorie'] = (recipes_df['calories'] > 300).astype(int)
recipes_df['High Fat'] = (recipes_df['total fat (PDV)'] > 15).astype(int)
recipes_df['Moderate Sodium'] = ((recipes_df['sodium (PDV)'] >= 10) & (recipes_df['sodium (PDV)'] <= 15)).astype(int)
recipes_df['Low Sodium'] = (recipes_df['sodium (PDV)'] < 5).astype(int)
recipes_df['Weight Loss'] = (recipes_df['Low Sugar'] + recipes_df['Low Calorie'] + recipes_df['Low Fat'] >= 2).astype(int)
recipes_df['Weight Gain'] = (recipes_df['High Calorie'] + recipes_df['High Protein'] >= 2).astype(int)
recipes_df['Muscle Building'] = (recipes_df['High Protein'] + recipes_df['High Fat'] >= 2).astype(int)
recipes_df['Blood Pressure Management'] = (recipes_df['Low Sodium'] + recipes_df['Moderate Sodium'] >= 1).astype(int)


In [13]:
recipes_df.head(5)

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,...,Low Fat,Low Calorie,High Calorie,High Fat,Moderate Sodium,Low Sodium,Weight Loss,Weight Gain,Muscle Building,Blood Pressure Management
1501,49er flapjacks,400885,35,1396198,2009-11-23,"['60-minutes-or-less', 'time-to-make', 'course...","[287.6, 24.0, 29.0, 27.0, 24.0, 42.0, 8.0]",18,"['the night before your pancake meal , in smal...",almost a perfect reproduction of the original ...,...,0,0,0,1,0,0,0,0,1,0
2586,adobo chicken pie,296649,55,704950,2008-04-05,"['60-minutes-or-less', 'time-to-make', 'course...","[271.8, 14.0, 23.0, 11.0, 26.0, 15.0, 12.0]",15,"['preheat oven to 400 degrees f', 'in a large ...",i love that chicken tamale pie at trader joe's...,...,0,0,0,0,1,0,0,0,0,1
2653,aebleskiver poffertjes,334674,75,613544,2008-11-03,"['danish', 'time-to-make', 'course', 'main-ing...","[51.3, 1.0, 7.0, 1.0, 3.0, 2.0, 3.0]",12,"['combine flours , salt , yeast , sugar and nu...",danish-dutch fusion cuisine. the recipe for th...,...,1,1,0,0,0,1,1,0,0,1
1055,2 ingredient toasted coconut macaroon cookie c...,399828,8,1431918,2009-11-17,"['15-minutes-or-less', 'time-to-make', 'course...","[291.4, 17.0, 172.0, 5.0, 11.0, 41.0, 14.0]",10,['pour around 3 / 4 of the can of condensed mi...,"these are great, really simple, really easy an...",...,0,0,0,1,0,0,0,0,0,0
705,holiday punch cheers,79736,15,58104,2003-12-29,"['15-minutes-or-less', 'time-to-make', 'course...","[170.4, 0.0, 86.0, 0.0, 1.0, 0.0, 8.0]",6,['combine first 6 ingredients in a large punch...,no you're not seeing stars yet! kidding aside ...,...,1,1,0,0,0,1,1,0,0,1


In [14]:
def combine_tags(row):
    categories = []
    if row['Weight Loss']:
        categories.append('Weight Loss')
    if row['Weight Gain']:
        categories.append('Weight Gain')
    if row['Muscle Building']:
        categories.append('Muscle Building')
    if row['Blood Pressure Management']:
        categories.append('Blood Pressure Management')
    if row['High Protein']:
        categories.append('High Protein')
    if row['Very High Protein']:
        categories.append('Very High Protein')
    if row['Low Sugar']:
        categories.append('Low Sugar')
    if row['Low Fat']:
        categories.append('Low Fat')
    if row['Low Calorie']:
        categories.append('Low Calorie')
    if row['High Calorie']:
        categories.append('High Calorie')
    if row['High Fat']:
        categories.append('High Fat')
    if row['Moderate Sodium']:
        categories.append('Moderate Sodium')
    if row['Low Sodium']:
        categories.append('Low Sodium')
    if not categories:
        categories.append('General') # Default category
    return ', '.join(categories)


In [15]:
import ast

# Define tag categories
time_tags = ['60-minutes-or-less','30-minutes-or-less','4-hours-or-less','15-minutes-or-less','1-day-or-more']
course_tags = ['main-dish', 'side-dishes', 'desserts', 'appetizers', 'salads', 'snacks', 'soups-stews', 'breakfast', 'lunch', 'dinner-party', 'brunch']
cuisine_tags = ['north-american', 'european', 'mexican', 'italian', 'asian', 'middle-eastern', 'greek', 'southern-united-states', 'indian', 'caribbean', 'african']
dietary_tags = ['vegetarian', 'vegan', 'low-carb', 'low-fat', 'low-sodium', 'gluten-free', 'low-calorie', 'diabetic', 'low-cholesterol']
ingredient_tags = ['vegetables', 'meat', 'seafood', 'fruit', 'eggs-dairy', 'pasta', 'poultry', 'beef', 'pork', 'nuts', 'cheese']
taste_mood_tags = ['sweet', 'savory', 'spicy', 'comfort-food', 'romantic', 'healthy', 'kid-friendly', 'beginner-cook']
occasion_tags = ['holiday-event', 'christmas', 'thanksgiving', 'valentines-day', 'summer', 'fall', 'winter', 'spring', 'picnic', 'to-go']
technique_tags = ['oven', 'stove-top', 'grilling', 'crock-pot-slow-cooker', 'broil', 'baking', 'stir-fry']
equipment_tags = ['food-processor-blender', 'small-appliance', 'refrigerator', 'microwave']
#health_tags = ['low-saturated-fat', 'high-calcium', 'high-protein', 'very-low-carbs', 'low-sugar']
specific_ingredients_tags = ['chocolate', 'berries', 'tropical-fruit', 'onions', 'citrus', 'potatoes', 'carrots', 'mushrooms']


# Function to filter tags into categories
def categorize_tags(tags, category_list):
    tags = ast.literal_eval(tags)  # Convert string representation of list to actual list
    return [tag for tag in tags if tag in category_list]

# Create new columns for each category
recipes_df['time_tags'] = recipes_df['tags'].apply(lambda x: categorize_tags(x, time_tags))
recipes_df['course_tags'] = recipes_df['tags'].apply(lambda x: categorize_tags(x, course_tags))
recipes_df['cuisine_tags'] = recipes_df['tags'].apply(lambda x: categorize_tags(x, cuisine_tags))
recipes_df['dietary_tags'] = recipes_df['tags'].apply(lambda x: categorize_tags(x, dietary_tags))
recipes_df['ingredient_tags'] = recipes_df['tags'].apply(lambda x: categorize_tags(x, ingredient_tags))
recipes_df['taste_mood_tags'] = recipes_df['tags'].apply(lambda x: categorize_tags(x, taste_mood_tags))
recipes_df['occasion_tags'] = recipes_df['tags'].apply(lambda x: categorize_tags(x, occasion_tags))
recipes_df['technique_tags'] = recipes_df['tags'].apply(lambda x: categorize_tags(x, technique_tags))
recipes_df['equipment_tags'] = recipes_df['tags'].apply(lambda x: categorize_tags(x, equipment_tags))
#recipes_df['health_tags'] = recipes_df['tags'].apply(lambda x: categorize_tags(x, health_tags))
recipes_df['specific_ingredients_tags'] = recipes_df['tags'].apply(lambda x: categorize_tags(x, specific_ingredients_tags))
recipes_df['Health_tags'] = recipes_df.apply(combine_tags, axis=1)

# Verify results
recipes_df[['tags', 'course_tags', 'cuisine_tags', 'dietary_tags','Health_tags']].head()


Unnamed: 0,tags,course_tags,cuisine_tags,dietary_tags,Health_tags
1501,"['60-minutes-or-less', 'time-to-make', 'course...",[breakfast],[north-american],[],"Muscle Building, High Protein, High Fat"
2586,"['60-minutes-or-less', 'time-to-make', 'course...",[main-dish],"[north-american, mexican]",[],"Blood Pressure Management, High Protein, Moder..."
2653,"['danish', 'time-to-make', 'course', 'main-ing...","[breakfast, brunch]",[european],[low-sodium],"Weight Loss, Blood Pressure Management, Low Fa..."
1055,"['15-minutes-or-less', 'time-to-make', 'course...",[desserts],[],[],High Fat
705,"['15-minutes-or-less', 'time-to-make', 'course...","[dinner-party, brunch]",[],"[low-fat, low-sodium, low-cholesterol, low-cal...","Weight Loss, Blood Pressure Management, Low Fa..."


In [16]:
recipes_df.head(2)

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,...,course_tags,cuisine_tags,dietary_tags,ingredient_tags,taste_mood_tags,occasion_tags,technique_tags,equipment_tags,specific_ingredients_tags,Health_tags
1501,49er flapjacks,400885,35,1396198,2009-11-23,"['60-minutes-or-less', 'time-to-make', 'course...","[287.6, 24.0, 29.0, 27.0, 24.0, 42.0, 8.0]",18,"['the night before your pancake meal , in smal...",almost a perfect reproduction of the original ...,...,[breakfast],[north-american],[],[],[],[],[],[],[],"Muscle Building, High Protein, High Fat"
2586,adobo chicken pie,296649,55,704950,2008-04-05,"['60-minutes-or-less', 'time-to-make', 'course...","[271.8, 14.0, 23.0, 11.0, 26.0, 15.0, 12.0]",15,"['preheat oven to 400 degrees f', 'in a large ...",i love that chicken tamale pie at trader joe's...,...,[main-dish],"[north-american, mexican]",[],"[poultry, meat]",[],[],[],[],[],"Blood Pressure Management, High Protein, Moder..."


In [35]:
filtered_recipes = recipes_df[
    recipes_df['ingredients'].apply(lambda x: 'eggs' in x) & 
    recipes_df['Health_tags'].apply(lambda x: 'Muscle Building' in x)
]

print(filtered_recipes[['ingredients', 'course_tags', 'Health_tags']])


                                              ingredients  \
1501    ['dry yeast', 'milk', 'butter', 'eggs', 'sugar...   
1144    ['cooked chicken', 'cooked rice', 'slivered al...   
2138    ['eggs', 'light brown sugar', 'sugar', 'vanill...   
1338    ['all-purpose flour', 'baking soda', 'salt', '...   
199     ['ground beef', 'eggs', 'ketchup', 'seasoned b...   
...                                                   ...   
231522  ['zucchini', 'salt', 'flour', 'scallion', 'but...   
231320  ['zucchini', 'salt', 'eggs', 'parmesan cheese'...   
230841  ['lasagna noodles', 'cottage cheese', 'montere...   
230949  ['zucchini', 'plain flour', 'caster sugar', 'b...   
231182  ['butter', 'zucchini', 'fresh tarragon', 'parm...   

                     course_tags  \
1501                 [breakfast]   
1144                 [main-dish]   
2138                  [desserts]   
1338                  [desserts]   
199                  [main-dish]   
...                          ...   
231522  [main-d

In [34]:
print(recipes_df[['ingredients', 'course_tags','Health_tags']])

                                              ingredients  \
1501    ['dry yeast', 'milk', 'butter', 'eggs', 'sugar...   
2586    ['cooked chicken breast', 'dry mustard', 'flou...   
2653    ['all-purpose flour', 'whole wheat flour', 'sa...   
1055    ['desiccated coconut', 'sweetened condensed mi...   
705     ['cranberry juice cocktail', 'unsweetened pine...   
...                                                   ...   
231022  ['eggs', 'unsweetened applesauce', 'vanilla ex...   
230168  ['light chunk tuna in water', 'baby carrots', ...   
230680  ['tilapia fillets', 'fresh coriander', 'fresh ...   
231182  ['butter', 'zucchini', 'fresh tarragon', 'parm...   
230602  ['hot pepper sauce', 'green peppers', 'ground ...   

                   course_tags  \
1501               [breakfast]   
2586               [main-dish]   
2653       [breakfast, brunch]   
1055                [desserts]   
705     [dinner-party, brunch]   
...                        ...   
231022     [breakfast, brunch

In [17]:
recipes_df.columns

Index(['name', 'id', 'minutes', 'contributor_id', 'submitted', 'tags',
       'nutrition', 'n_steps', 'steps', 'description', 'ingredients',
       'n_ingredients', 'time_tags', 'time_tags_str', 'processed_ingredients',
       'combined_features', 'calories', 'total fat (PDV)', 'sugar (PDV)',
       'sodium (PDV)', 'protein (PDV)', 'saturated fat (PDV)',
       'carbohydrates (PDV)', 'High Protein', 'Very High Protein', 'Low Sugar',
       'Low Fat', 'Low Calorie', 'High Calorie', 'High Fat', 'Moderate Sodium',
       'Low Sodium', 'Weight Loss', 'Weight Gain', 'Muscle Building',
       'Blood Pressure Management', 'course_tags', 'cuisine_tags',
       'dietary_tags', 'ingredient_tags', 'taste_mood_tags', 'occasion_tags',
       'technique_tags', 'equipment_tags', 'specific_ingredients_tags',
       'Health_tags'],
      dtype='object')

In [21]:
recipes_df

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,...,course_tags_features,cuisine_tags_features,dietary_tags_features,ingredient_tags_features,taste_mood_tags_features,occasion_tags_features,technique_tags_features,equipment_tags_features,Health_tags_features,specific_ingredients_tags_features
1501,49er flapjacks,400885,35,1396198,2009-11-23,"['60-minutes-or-less', 'time-to-make', 'course...","[287.6, 24.0, 29.0, 27.0, 24.0, 42.0, 8.0]",18,"['the night before your pancake meal , in smal...",almost a perfect reproduction of the original ...,...,breakfast,north-american,,,,,,,,
2586,adobo chicken pie,296649,55,704950,2008-04-05,"['60-minutes-or-less', 'time-to-make', 'course...","[271.8, 14.0, 23.0, 11.0, 26.0, 15.0, 12.0]",15,"['preheat oven to 400 degrees f', 'in a large ...",i love that chicken tamale pie at trader joe's...,...,main-dish,north-american mexican,,poultry meat,,,,,,
2653,aebleskiver poffertjes,334674,75,613544,2008-11-03,"['danish', 'time-to-make', 'course', 'main-ing...","[51.3, 1.0, 7.0, 1.0, 3.0, 2.0, 3.0]",12,"['combine flours , salt , yeast , sugar and nu...",danish-dutch fusion cuisine. the recipe for th...,...,breakfast brunch,european,low-sodium,eggs-dairy,healthy kid-friendly,,stove-top,,,
1055,2 ingredient toasted coconut macaroon cookie c...,399828,8,1431918,2009-11-17,"['15-minutes-or-less', 'time-to-make', 'course...","[291.4, 17.0, 172.0, 5.0, 11.0, 41.0, 14.0]",10,['pour around 3 / 4 of the can of condensed mi...,"these are great, really simple, really easy an...",...,desserts,,,fruit nuts,,,,,,
705,holiday punch cheers,79736,15,58104,2003-12-29,"['15-minutes-or-less', 'time-to-make', 'course...","[170.4, 0.0, 86.0, 0.0, 1.0, 0.0, 8.0]",6,['combine first 6 ingredients in a large punch...,no you're not seeing stars yet! kidding aside ...,...,dinner-party brunch,,low-fat low-sodium low-cholesterol low-calorie,fruit nuts,healthy spicy comfort-food sweet,holiday-event christmas valentines-day,,refrigerator,,berries citrus tropical-fruit
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
231022,zucchini blueberry bread lighter version,181155,65,55594,2006-08-09,"['time-to-make', 'course', 'preparation', 'occ...","[154.6, 1.0, 85.0, 5.0, 5.0, 1.0, 11.0]",10,"['preheat oven to 350', 'grease 2 large loaf p...",a light zucchini bread with blueberries. the ...,...,breakfast brunch,,low-fat,,healthy,,,,,
230168,yummy tuna wraps,64788,15,69838,2003-06-17,"['15-minutes-or-less', 'time-to-make', 'course...","[153.7, 7.0, 8.0, 16.0, 44.0, 5.0, 1.0]",10,"['open your can of tuna and drain well', 'empt...",these tuna wraps really satisfy my tuna cravin...,...,lunch main-dish,,,seafood,beginner-cook,to-go,,,,
230680,zingy roasted fish with lemon herb couscous,86525,35,130849,2004-03-13,"['60-minutes-or-less', 'time-to-make', 'course...","[307.1, 4.0, 11.0, 3.0, 66.0, 4.0, 12.0]",13,"['clean fish and pat dry', 'make 3 diagonal sc...",this one will really give your taste buds a ki...,...,main-dish,,,seafood,,,,,,
231182,zucchini gratin,15309,50,9869,2001-12-05,"['60-minutes-or-less', 'time-to-make', 'course...","[400.6, 46.0, 31.0, 25.0, 41.0, 88.0, 4.0]",14,"['preheat oven to 350f degrees', 'butter 9 inc...",creamy vegetable dish,...,side-dishes,,low-calorie low-carb,eggs-dairy vegetables cheese,,,oven,,,


In [18]:
# A one stop code for Recommendation based on all tags.
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import ast

# Function to combine selected tags and features into a single text string
def combine_selected_features(row, tag_columns):
    combined_str = ''
    for col in tag_columns:
        if col in row:
            combined_str += ' '.join(row[col]) + ' '
    return combined_str.strip()

# Define the tag categories you want to use
tag_columns_to_use = [
    'time_tags', 'course_tags', 'cuisine_tags', 'dietary_tags',
    'ingredient_tags', 'taste_mood_tags', 'occasion_tags',
    'technique_tags', 'equipment_tags', 'Health_tags', 'specific_ingredients_tags'
]

# Combine selected tags into a single feature column
recipes_df['selected_features'] = recipes_df.apply(
    lambda row: combine_selected_features(row, tag_columns_to_use), axis=1
)

# Use CountVectorizer to vectorize the combined features
vectorizer = CountVectorizer(tokenizer=lambda x: x.split(' '))
selected_features_matrix = vectorizer.fit_transform(recipes_df['selected_features'])

# Compute cosine similarity
similarity_matrix = cosine_similarity(selected_features_matrix)

# Convert similarity matrix to a DataFrame for easier lookup
similarity_df = pd.DataFrame(similarity_matrix, index=recipes_df.index, columns=recipes_df.index)

# Function to get top N similar recipes based on combined tags
def get_similar_recipes(recipe_index, top_n=25):
    # Sort recipes by similarity score
    similar_scores = similarity_df[recipe_index].sort_values(ascending=False)
    # Exclude the recipe itself and return top N recommendations
    similar_recipes = similar_scores.drop(recipe_index).head(top_n)
    return similar_recipes

# Test the recommendation system with a sample recipe
sample_recipe_index = recipes_df.index[0]  # Replace with a specific recipe ID if needed
recommended_recipes = get_similar_recipes(sample_recipe_index, top_n=25)

# Print recommendations
print(f"Top {len(recommended_recipes)} similar recipes for Recipe {recipes_df.iloc[sample_recipe_index]['name']} (Index: {sample_recipe_index}):")
print(recommended_recipes)

# Access details of recommended recipes
print("\nDetails of recommended recipes:")
print(recipes_df.loc[recommended_recipes.index, ['name', 'selected_features']])




Top 25 similar recipes for Recipe baked shrimp toasts (Index: 1501):
168107    0.995864
128664    0.995418
228039    0.995418
78550     0.995418
78418     0.994169
103752    0.993358
52178     0.993194
225626    0.993194
45151     0.993194
106258    0.992723
181052    0.992723
79139     0.992723
168049    0.992630
127328    0.992630
197809    0.992630
170676    0.992630
181197    0.992630
197893    0.992630
166831    0.992630
146739    0.992630
230432    0.992630
161419    0.991891
74153     0.991891
13686     0.991891
76338     0.991780
Name: 1501, dtype: float64

Details of recommended recipes:
                                                     name  \
168107                          quick   easy mexican soup   
128664                make ahead breakfast burritos  oamc   
228039             wonderful homemade chicken noodle soup   
78550                                  easy vegetable pie   
78418                              easy tomato basil tart   
103752                        

In [19]:
# A Genre Type View Of Recommendation

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import ast

# Function to combine features for a specific group of tags
def combine_group_features(row, tag_column):
    if tag_column in row and isinstance(row[tag_column], list):
        return ' '.join(row[tag_column])
    return ''

# Function to calculate and return recommendations for a specific group
def get_recommendations_for_group(recipes_df, tag_column, recipe_index, top_n=25):
    # Combine features for the specified tag column
    recipes_df[f'{tag_column}_features'] = recipes_df.apply(
        lambda row: combine_group_features(row, tag_column), axis=1
    )
    
    # Vectorize the group-specific features
    vectorizer = CountVectorizer(tokenizer=lambda x: x.split(' '))
    features_matrix = vectorizer.fit_transform(recipes_df[f'{tag_column}_features'])
    
    # Compute cosine similarity
    similarity_matrix = cosine_similarity(features_matrix)
    
    # Convert similarity matrix to DataFrame
    similarity_df = pd.DataFrame(similarity_matrix, index=recipes_df.index, columns=recipes_df.index)
    
    # Get top N similar recipes
    similar_scores = similarity_df[recipe_index].sort_values(ascending=False)
    similar_recipes = similar_scores.drop(recipe_index).head(top_n)
    return similar_recipes

# Example: Generate recommendations for each group of tags
tag_groups = [
    'time_tags', 'course_tags', 'cuisine_tags', 'dietary_tags',
    'ingredient_tags', 'taste_mood_tags', 'occasion_tags',
    'technique_tags', 'equipment_tags', 'Health_tags', 'specific_ingredients_tags'
]

# Recipe index to test
sample_recipe_index = recipes_df.index[0]  # Replace with your recipe index
all_recommendations = {}

for group in tag_groups:
    try:
        recommendations = get_recommendations_for_group(
            recipes_df, group, sample_recipe_index, top_n=10
        )
        all_recommendations[group] = recommendations
        print(f"\nRecommendations based on {group}:")
        print(recipes_df.loc[recommendations.index, ['name', group]])
    except ValueError as e:
        print(f"\nNo valid features for {group}: {e}")

# Access all recommendations
print("\nSummary of Recommendations:")
for group, recs in all_recommendations.items():
    print(f"\n{group} Recommendations:")
    print(recipes_df.loc[recs.index, ['name']])





Recommendations based on time_tags:
                                            name             time_tags
110177                    i can t wait for chole  [60-minutes-or-less]
114392  japanese crispy fried chicken   kara age  [60-minutes-or-less]
112509         italian christmas cookies  filled  [60-minutes-or-less]
111323         individual sweet potato crostatas  [60-minutes-or-less]
111242           individual breakfast casseroles  [60-minutes-or-less]
112450        italian chicken and vegetable soup  [60-minutes-or-less]
112753                   italian mashed potatoes  [60-minutes-or-less]
113197                         italian struffoli  [60-minutes-or-less]
112623           italian garlic roasted potatoes  [60-minutes-or-less]
114995                                jerk wings  [60-minutes-or-less]





Recommendations based on course_tags:
                                                     name  course_tags
53574                           cinnamon breakfast apples  [breakfast]
220676                            vanilla ricotta muffins  [breakfast]
28673                             breakfast tofu smoothie  [breakfast]
57586   copy cat   cb hash brown casserole by todd wilbur  [breakfast]
165227                potatoes  with everything  pancakes  [breakfast]
222754                          venison breakfast sausage  [breakfast]
84686                                    flamenco eggs ii  [breakfast]
53957                       cinnamon rolls    kanelbullar  [breakfast]
28429                               breakfast burrito  ww  [breakfast]
202778                     sugar hill inn s raisin scones  [breakfast]





Recommendations based on cuisine_tags:
                                                    name      cuisine_tags
90711                 garlic  parmesan stuffed mushrooms  [north-american]
94209  good ol  summertime rhubarb custard dessert wi...  [north-american]
91553          gestoofde runderlappen   scallops of beef  [north-american]
93849                            golden fried tofu bites  [north-american]
94797       grammy s easy potluck mixed veggie casserole  [north-american]
90655                           garlic stuffed mushrooms  [north-american]
93024                      gluten free blueberry muffins  [north-american]
93321                        gluten free cranberry bread  [north-american]
94724                               gramma s apple crisp  [north-american]
92220                                       ginger water  [north-american]





Recommendations based on dietary_tags:
                                                     name dietary_tags
133585                          melting pot cheese fondue           []
132413                  meat and macaroni pie   pastitsio           []
131600                          mary jarrell s pound cake           []
133021                       mediterranean fish casserole           []
133543   melt in your mouth chocolate peanut butter fudge           []
130926  marinated greek olives with grape tomatoes and...           []
133049                  mediterranean hummus turkey wraps           []
130642       margie s potato salad  with italian dressing           []
130501                                       marc s chops           []
134066                            mexican chicken pot pie           []





Recommendations based on ingredient_tags:
                                                     name ingredient_tags
81255                                   ex boyfriend shot              []
177217       rosemary garlic biscuits in a bag  for gifts              []
80733                      english muffin casserole bread              []
175626                        roasted garlic on the grill              []
80297                             emerald  chicken  salad              []
82339                        farro  apple and pecan salad              []
80653   endive with goat cheese  fig and honey glazed ...              []
83774                                fig and bourbon fizz              []
84520                              five flavor pound cake              []
177826                              rum fruitcake cookies              []





Recommendations based on taste_mood_tags:
                                                    name taste_mood_tags
85023                             flori s hot tub heaven              []
192696                          southwest chicken bundle              []
86103             french bread with chipotle lime butter              []
85168   flourless peanut butter  pumpkin butter pancakes              []
135254                         microwave sugar cream pie              []
87570              fried chicken by my dad  jos rodrigue              []
87377          fresh tomato sauce with fennel and orange              []
193418                                   spaghetti kugel              []
89257                                   fudgies  no bake              []
88862                    fruit salad with orange liqueur              []





Recommendations based on occasion_tags:
                                        name occasion_tags
143911  night crossing soup  pressure cooker            []
141293                   mustard glazed fish            []
144058              no bake corn flake chews            []
144253         no fail sour cream pound cake            []
141197  mussels with chili  garlic and basil            []
143825   nif s pretty bell pepper rice pilaf            []
140555         murray s poor man s breakfast            []
140805                    mushroom meatballs            []
140893                         mushroom ritz            []
141183                      mussels marinara            []





Recommendations based on technique_tags:
                                                     name technique_tags
143729  niederrheinische sttchen  lower rhenish sweet ...             []
141323                               mustard pickled eggs             []
142292                                        my tabouleh             []
144310                                 no hassle meatloaf             []
144392                                 no roll cherry pie             []
143235                 new age pimento cheese with chives             []
144120                            no bake strawberry cake             []
142232                              my scalloped potatoes             []
142031                       my mom s delicious pecan pie             []
141541                                  my chicken malibu             []





Recommendations based on equipment_tags:
                                                     name equipment_tags
154838                                     payday cookies             []
154076  pasta with pancetta  broccoli or broccoli rabe...             []
152483                         pap  potatoes and cornmeal             []
153653                                    pasta e fagioli             []
150999                   oven roasted santa maria tri tip             []
153145    parmesan crusted chicken on bed of fancy greens             []
150911                           oven meatballs and gravy             []
153979              pasta with creamy bacon and pea sauce             []
153425                          passion fruit vinaigrette             []
152357                                      panfried fish             []





Recommendations based on Health_tags:
                                                     name  \
152845                              parmesan cheese rolls   
150080                               oriental pasta salad   
153673                              pasta e  pisella soup   
152890                           parmesan cornflaked fish   
150177                             original fantasy fudge   
153912                      pasta with bacon tomato sauce   
152254  pancetta   boursin polenta with sauteed mixed ...   
152907                           parmesan crusted chicken   
152053               pan steaks with garlic mustard sauce   
151872  pampered chef style apple crisp  for microwave...   

                                              Health_tags  
152845  Weight Loss, Blood Pressure Management, Low Fa...  
150080         Blood Pressure Management, Moderate Sodium  
153673                                       High Protein  
152890  Weight Loss, High Protein, Very High Prot




Recommendations based on specific_ingredients_tags:
                                                  name  \
147323                      old fashioned rice pudding   
149816                           oregano dijon chicken   
146057        oatmeal and bananas breakfast  baby food   
147351        old style mustard and rosemary asparagus   
145240                               not chopped liver   
147094                         old fashioned tea cakes   
147123  old mexican inn dip  serve with tortilla chips   
148601                       orange and lemon tea fizz   
149209                                 orange pancakes   
145079                            north woods pancakes   

       specific_ingredients_tags  
147323                        []  
149816                        []  
146057                        []  
147351                        []  
145240                        []  
147094                        []  
147123                        []  
148601                      

In [39]:
def generate_daily_diet_plan_debug(goal, preferred_ingredients):
    """
    Generate a personalized full-day diet plan based on user goal and preferences.
    Includes debug prints to check intermediate steps.

    :param goal: User's dietary goal (e.g., "Weight Gain", "Muscle Building")
    :param preferred_ingredients: List of preferred ingredients
    :return: A DataFrame with suggestions for each course tag
    """
    daily_plan = []
    course_tags = [
        'breakfast', 'lunch', 'dinner-party', 'snacks', 'appetizers', 'desserts'
    ]  # Modify this list as needed

    print(f"Generating diet plan for goal: {goal} and preferred ingredients: {preferred_ingredients}\n")

    for course in course_tags:
        print(f"Processing course: {course}...")
        course_data = recipes_df[
            (recipes_df['course_tags'].apply(lambda x: course in x if isinstance(x, list) else False)) &
            (recipes_df['Health_tags'].apply(lambda x: goal in x if isinstance(x, str) else False)) &
            (recipes_df['ingredients'].apply(
                lambda x: any(ing in str(x) for ing in preferred_ingredients))
            )
        ]

        print(f"Found {len(course_data)} recipes for {course}")

        if not course_data.empty:
            # Select the top recipe for this course based on calories or user preferences
            selected_recipe = course_data.sort_values(
                by=['calories', 'protein (PDV)'], ascending=[False, False]
            ).iloc[0]
            daily_plan.append({
                'Course': course.capitalize(),
                'Recipe Name': selected_recipe['name'],
                #'Categories': selected_recipe['categories'],
                'Calories': selected_recipe['calories'],
                'Protein': selected_recipe['protein (PDV)'],
                'Ingredients': selected_recipe['ingredients']
            })
    
    if daily_plan:
        print("Successfully generated the diet plan.")
    else:
        print("No recipes matched the criteria. Please adjust your inputs.")

    return pd.DataFrame(daily_plan)


In [40]:
# Example usage
user_goal = "Muscle Building"
user_preferred_ingredients = ["eggs"]

# Generate the diet plan with debugging
daily_diet_plan = generate_daily_diet_plan_debug(user_goal, user_preferred_ingredients)

# Print the diet plan chart to the console
print("\nFull-Day Diet Plan Based on Course Tags")
if not daily_diet_plan.empty:
    print(daily_diet_plan.to_string(index=False))
else:
    print("No recipes found for the given goal and preferences.")


Generating diet plan for goal: Muscle Building and preferred ingredients: ['eggs']

Processing course: breakfast...
Found 344 recipes for breakfast
Processing course: lunch...
Found 161 recipes for lunch
Processing course: dinner-party...
Found 161 recipes for dinner-party
Processing course: snacks...
Found 26 recipes for snacks
Processing course: appetizers...
Found 71 recipes for appetizers
Processing course: desserts...
Found 282 recipes for desserts
Successfully generated the diet plan.

Full-Day Diet Plan Based on Course Tags
      Course                    Recipe Name  Calories  Protein                                                                                                                                                                                                                                                                                                                                                                                                                 

In [43]:
daily_diet_plan.head()

Unnamed: 0,Course,Recipe Name,Calories,Protein,Ingredients
0,Breakfast,french toast bake,4348.9,253.0,"['white bread', 'cream cheese', 'eggs', 'milk'..."
1,Lunch,carrots raisins coconut bread,6408.3,165.0,"['eggs', 'vegetable oil', 'vanilla', 'carrots'..."
2,Dinner-party,the ultimate carrot cake,11811.4,272.0,"['whole wheat flour', 'flour', 'baking soda', ..."
3,Snacks,carrots raisins coconut bread,6408.3,165.0,"['eggs', 'vegetable oil', 'vanilla', 'carrots'..."
4,Appetizers,traditional antipasti platter,10805.8,1232.0,"['prosciutto', 'salami', 'capicola', 'soppress..."


In [42]:
from tabulate import tabulate
def display_table(dataframe, message=""):
    """
    Display the DataFrame in a neat table format.
    :param dataframe: Pandas DataFrame to display.
    :param message: Additional message to display.
    """
    if dataframe.empty:
        print(f"\n{message} No data available to display.")
    else:
        print(f"\n{message}")
        print(tabulate(dataframe, headers='keys', tablefmt='grid', showindex=False))

# Example usage
user_goal = "Muscle Building"
user_preferred_ingredients = ["eggs"]

# Generate the diet plan with debugging
daily_diet_plan = generate_daily_diet_plan_debug(user_goal, user_preferred_ingredients)

# Display the diet plan in table format
display_table(daily_diet_plan, "Full-Day Diet Plan Based on Course Tags")

Generating diet plan for goal: Muscle Building and preferred ingredients: ['eggs']

Processing course: breakfast...
Found 344 recipes for breakfast
Processing course: lunch...
Found 161 recipes for lunch
Processing course: dinner-party...
Found 161 recipes for dinner-party
Processing course: snacks...
Found 26 recipes for snacks
Processing course: appetizers...
Found 71 recipes for appetizers
Processing course: desserts...
Found 282 recipes for desserts
Successfully generated the diet plan.

Full-Day Diet Plan Based on Course Tags
+--------------+--------------------------------+------------+-----------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------