In [1]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!mkdir -p ~/.kaggle
!cp /content/drive/MyDrive/PersonalProjects/RecipesRecommendation/kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


In [3]:
!kaggle datasets download -d hugodarwood/epirecipes
!unzip epirecipes.zip -d epirecipes


Dataset URL: https://www.kaggle.com/datasets/hugodarwood/epirecipes
License(s): unknown
Downloading epirecipes.zip to /content
  0% 0.00/11.3M [00:00<?, ?B/s]
100% 11.3M/11.3M [00:00<00:00, 779MB/s]
Archive:  epirecipes.zip
  inflating: epirecipes/epi_r.csv    
  inflating: epirecipes/full_format_recipes.json  
  inflating: epirecipes/recipe.py    
  inflating: epirecipes/utils.py     


### Data Loading and Understanding the Dataset

In [4]:
import pandas as pd
recipes = pd.read_csv('epirecipes/epi_r.csv')
print(recipes.head())


                                         title  rating  calories  protein  \
0              Lentil, Apple, and Turkey Wrap    2.500     426.0     30.0   
1  Boudin Blanc Terrine with Red Onion Confit    4.375     403.0     18.0   
2                Potato and Fennel Soup Hodge    3.750     165.0      6.0   
3             Mahi-Mahi in Tomato Olive Sauce    5.000       NaN      NaN   
4                    Spinach Noodle Casserole    3.125     547.0     20.0   

    fat  sodium  #cakeweek  #wasteless  22-minute meals  3-ingredient recipes  \
0   7.0   559.0        0.0         0.0              0.0                   0.0   
1  23.0  1439.0        0.0         0.0              0.0                   0.0   
2   7.0   165.0        0.0         0.0              0.0                   0.0   
3   NaN     NaN        0.0         0.0              0.0                   0.0   
4  32.0   452.0        0.0         0.0              0.0                   0.0   

   ...  yellow squash  yogurt  yonkers  yuca  zucc

In [5]:

print("Dataset shape:", recipes.shape)
print("Columns count:", len(recipes.columns))




Dataset shape: (20052, 680)
Columns count: 680


In [6]:

print("First 5 columns:", list(recipes.columns[:5]))
print("Last 5 columns:", list(recipes.columns[-5:]))



First 5 columns: ['title', 'rating', 'calories', 'protein', 'fat']
Last 5 columns: ['cookbooks', 'leftovers', 'snack', 'snack week', 'turkey']


In [7]:
metadata_cols = ['title', 'rating', 'calories', 'protein', 'fat', 'sodium']
ingredient_cols = [col for col in recipes.columns if col not in metadata_cols]

print(f"Number of ingredient columns: {len(ingredient_cols)}")


Number of ingredient columns: 674


In [8]:

# sparsity check: % of zeros in ingredient columns (how often ingredients are absent)
ingredient_data = recipes[ingredient_cols]
zero_percentage = (ingredient_data == 0).sum().sum() / ingredient_data.size * 100
print(f"Ingredient matrix sparsity (percent zeros): {zero_percentage:.2f}%")


Ingredient matrix sparsity (percent zeros): 98.19%


In [9]:
# count of ingredients per recipe (sum of 1s per row)
recipes['ingredient_count'] = ingredient_data.sum(axis=1)
print("Ingredients per recipe stats:")
print(recipes['ingredient_count'].describe())

Ingredients per recipe stats:
count    20052.000000
mean        12.197536
std          5.058298
min          1.000000
25%          8.000000
50%         11.000000
75%         16.000000
max         37.000000
Name: ingredient_count, dtype: float64


### Problem Definition and Initial Data Exploration

#### Problem Definition

- **Input:** List of ingredients that the user currently has.  
- **Output:** List of best matching recipes ranked by ingredient relevance and quality metrics such as rating and nutritional information.  
- **Goal:** To recommend realistic and high-quality recipes that the user can prepare based on their available ingredients, ensuring good user satisfaction.


#### Data Exploration & Cleaning

In [10]:
# 1. Check missing values in key columns
print("Missing values in key columns:")
print(recipes[['rating', 'calories', 'protein', 'fat', 'sodium']].isnull().sum())


Missing values in key columns:
rating         0
calories    4117
protein     4162
fat         4183
sodium      4119
dtype: int64


In [11]:

# 2. Fill missing ratings with median rating
median_rating = recipes['rating'].median()
recipes['rating'] = recipes['rating'].fillna(median_rating)
print(f"Filled missing ratings with median value: {median_rating}")



Filled missing ratings with median value: 4.375


In [12]:

# 3. Ingredient columns identification
metadata_cols = ['title', 'rating', 'calories', 'protein', 'fat', 'sodium']
ingredient_cols = [col for col in recipes.columns if col not in metadata_cols]



In [13]:
# 4. Ingredient distribution: sum across recipes (how common each ingredient is)
ingredient_counts = recipes[ingredient_cols].sum().sort_values(ascending=False)
print("\nTop 10 most common ingredients:")
print(ingredient_counts.head(10))

print("\nTop 10 rarest ingredients:")
print(ingredient_counts.tail(10))




Top 10 most common ingredients:
ingredient_count    244585.0
bon appétit           9355.0
peanut free           8390.0
soy free              8088.0
tree nut free         7044.0
vegetarian            6846.0
gourmet               6648.0
kosher                6175.0
pescatarian           6042.0
quick & easy          5372.0
dtype: float64

Top 10 rarest ingredients:
cr��me de cacao      1.0
dorie greenspan      1.0
costa mesa           1.0
camping              1.0
bulgaria             1.0
caviar               1.0
crêpe                1.0
burrito              1.0
beverly hills        1.0
tested & improved    1.0
dtype: float64


In [14]:
# 5. Rating distribution
print("\nRating statistics:")
print(recipes['rating'].describe())




Rating statistics:
count    20052.000000
mean         3.714467
std          1.340829
min          0.000000
25%          3.750000
50%          4.375000
75%          4.375000
max          5.000000
Name: rating, dtype: float64


In [15]:
# 6. Calories distribution
print("\nCalories statistics:")
print(recipes['calories'].describe())




Calories statistics:
count    1.593500e+04
mean     6.322958e+03
std      3.590460e+05
min      0.000000e+00
25%      1.980000e+02
50%      3.310000e+02
75%      5.860000e+02
max      3.011122e+07
Name: calories, dtype: float64


In [16]:
# 7. Protein distribution
print("\nProtein statistics:")
print(recipes['protein'].describe())




Protein statistics:
count     15890.000000
mean        100.160793
std        3840.318527
min           0.000000
25%           3.000000
50%           8.000000
75%          27.000000
max      236489.000000
Name: protein, dtype: float64


In [17]:
# 8. Fat distribution
print("\nFat statistics:")
print(recipes['fat'].describe())



Fat statistics:
count    1.586900e+04
mean     3.468775e+02
std      2.045611e+04
min      0.000000e+00
25%      7.000000e+00
50%      1.700000e+01
75%      3.300000e+01
max      1.722763e+06
Name: fat, dtype: float64


In [18]:

# 9. Sodium distribution
print("\nSodium statistics:")
print(recipes['sodium'].describe())


Sodium statistics:
count    1.593300e+04
mean     6.225975e+03
std      3.333182e+05
min      0.000000e+00
25%      8.000000e+01
50%      2.940000e+02
75%      7.110000e+02
max      2.767511e+07
Name: sodium, dtype: float64


### Feature Engineering

In [19]:
import numpy as np

# 1. Identify ingredient columns (exclude metadata)
metadata_cols = ['title', 'rating', 'calories', 'protein', 'fat', 'sodium']
non_ingredient_cols = [
    'title', 'rating', 'calories', 'protein', 'fat', 'sodium',
    '#cakeweek', '#wasteless', '22-minute meals', '3-ingredient recipes',
    'bon appétit', 'gourmet', 'winter', 'soup/stew', 'sauté', 'easy',
    'low-fat', 'dairy', 'pasta', 'pork', 'herb', 'blue cheese', 'vegetarian',
    'vegan', 'snack', 'leftovers', 'cookbooks', 'weeknight'
    # Add more as you find them
]

ingredient_cols = [col for col in recipes.columns if col not in non_ingredient_cols]

print(f"Number of ingredient columns: {len(ingredient_cols)}")



Number of ingredient columns: 656


In [20]:
# 2. Create 'total_ingredients' feature: Count of ingredients used in each recipe
recipes['total_ingredients'] = recipes[ingredient_cols].sum(axis=1)

# 3. Normalize nutritional info for scoring later (min-max scaling)
def min_max_scale(series):
    return (series - series.min()) / (series.max() - series.min())

recipes['rating_norm'] = min_max_scale(recipes['rating'])
recipes['calories_norm'] = min_max_scale(recipes['calories'].fillna(recipes['calories'].median()))
recipes['protein_norm'] = min_max_scale(recipes['protein'].fillna(recipes['protein'].median()))
recipes['fat_norm'] = min_max_scale(recipes['fat'].fillna(recipes['fat'].median()))
recipes['sodium_norm'] = min_max_scale(recipes['sodium'].fillna(recipes['sodium'].median()))

# 4. Create a 'health_score' combining nutrition metrics
# higher protein good (+), lower calories, fat, sodium good (-)
recipes['health_score'] = (recipes['protein_norm']) - (recipes['calories_norm'] + recipes['fat_norm'] + recipes['sodium_norm']) / 3
# Normalize health_score to 0-1
recipes['health_score_norm'] = min_max_scale(recipes['health_score'])

# 5. Check feature summaries
print("\nFeature summaries:")
print(recipes[['total_ingredients', 'rating_norm', 'calories_norm', 'protein_norm', 'fat_norm', 'sodium_norm', 'health_score_norm']].describe())



Feature summaries:
       total_ingredients   rating_norm  calories_norm  protein_norm  \
count       20052.000000  20052.000000   20052.000000  20052.000000   
mean           22.579593      0.742893       0.000169      0.000343   
std             9.751953      0.268166       0.010630      0.014456   
min             1.000000      0.000000       0.000000      0.000000   
25%            15.000000      0.750000       0.000008      0.000017   
50%            21.000000      0.875000       0.000011      0.000034   
75%            30.000000      0.875000       0.000017      0.000080   
max            72.000000      1.000000       1.000000      1.000000   

           fat_norm   sodium_norm  health_score_norm  
count  20052.000000  20052.000000       20052.000000  
mean       0.000161      0.000181           0.146912  
std        0.010563      0.010736           0.009553  
min        0.000000      0.000000           0.000000  
25%        0.000006      0.000004           0.146752  
50%       

### Recommendation Algorithm Design

Our recommendation score combines three key components:

1. **Ingredient Match Score:**  
   The number of ingredients from the user's input that appear in a recipe.

2. **Ingredient Coverage Ratio:**  
   The proportion of the recipe’s required ingredients that are matched by the user’s input.  
   (i.e., matched ingredients divided by total ingredients in the recipe)

3. **Recipe Quality Score:**  
   A normalized score based on recipe rating (and optionally nutrition).

The combined scoring formula is:

$$
\text{final_score} = \alpha \times \text{ingredient_match_score} + \beta \times \text{ingredient_coverage} + \gamma \times \text{rating_score}
$$

Where $\alpha, \beta, and \gamma$  are tunable weights to balance the importance of each component.



In [21]:
def recommend_recipes(user_ingredients, df, alpha=0.6, beta=0.3, gamma=0.1, top_n=10):
    # Filter valid ingredients from user input
    valid_ingredients = [ing for ing in user_ingredients if ing in df.columns]
    if not valid_ingredients:
        return "No valid ingredients found."

    # Calculate ingredient match count per recipe
    df['ingredient_match_count'] = df[valid_ingredients].sum(axis=1)

    # Calculate ingredient coverage ratio
    df['ingredient_coverage'] = df['ingredient_match_count'] / df['total_ingredients']

    # Normalize rating (already done in feature engineering as rating_norm)
    # Combine scores
    df['final_score'] = (alpha * df['ingredient_match_count'] +
                         beta * df['ingredient_coverage'] +
                         gamma * df['rating_norm'])

    # Filter recipes with at least one match
    filtered_df = df[df['ingredient_match_count'] > 0]

    # Sort and return top results
    return filtered_df.sort_values(by='final_score', ascending=False).head(top_n)[
        ['title', 'ingredient_match_count', 'ingredient_coverage', 'rating', 'final_score']
    ]


In [22]:
user_ingredients = ['tomato', 'cheese', 'basil', 'olive oil']
recommendations = recommend_recipes(user_ingredients, recipes)
print(recommendations)


                                                   title  \
8159                       Sausage and Cheese Manicotti    
4022   Tomatoes Stuffed with Fresh Mozzarella and Basil    
12326       Garlicky Eggplant, Tomato and Basil Bobolis    
19527                       BA's Best Eggplant Parmesan    
12814               Eggplant, Tomato, and Fontina Pizza    
2189                Eggplant, Tomato, and Fontina Pizza    
14746                  Roasted Yellow Pepper Bruschetta    
7927   Grilled Pizza with Tomato, Mozzarella, and Basil    
7996             Fusilli with Chère, Tomatoes and Basil    
7807        Eggplant, Tomato and Goat Cheese Sandwiches    

       ingredient_match_count  ingredient_coverage  rating  final_score  
8159                      3.0             0.200000   4.375     1.947500  
4022                      3.0             0.187500   4.375     1.943750  
12326                     3.0             0.166667   4.375     1.937500  
19527                     3.0             0

In [23]:
for i, row in recommendations.iterrows():
    print(f"{i+1}. {row['title']} - Score: {row['final_score']:.3f}, Matched Ingredients: {int(row['ingredient_match_count'])}, Coverage: {row['ingredient_coverage']:.2%}, Rating: {row['rating']}")


8160. Sausage and Cheese Manicotti  - Score: 1.947, Matched Ingredients: 3, Coverage: 20.00%, Rating: 4.375
4023. Tomatoes Stuffed with Fresh Mozzarella and Basil  - Score: 1.944, Matched Ingredients: 3, Coverage: 18.75%, Rating: 4.375
12327. Garlicky Eggplant, Tomato and Basil Bobolis  - Score: 1.937, Matched Ingredients: 3, Coverage: 16.67%, Rating: 4.375
19528. BA's Best Eggplant Parmesan  - Score: 1.937, Matched Ingredients: 3, Coverage: 16.67%, Rating: 4.375
12815. Eggplant, Tomato, and Fontina Pizza  - Score: 1.937, Matched Ingredients: 3, Coverage: 16.67%, Rating: 4.375
2190. Eggplant, Tomato, and Fontina Pizza  - Score: 1.937, Matched Ingredients: 3, Coverage: 16.67%, Rating: 4.375
14747. Roasted Yellow Pepper Bruschetta  - Score: 1.937, Matched Ingredients: 3, Coverage: 16.67%, Rating: 4.375
7928. Grilled Pizza with Tomato, Mozzarella, and Basil  - Score: 1.935, Matched Ingredients: 3, Coverage: 20.00%, Rating: 3.75
7997. Fusilli with Chère, Tomatoes and Basil  - Score: 1.935,

### Evaluation & Tuning

In [24]:
def evaluate_recommendations(user_ingredients, df, alpha, beta, gamma, top_n=10):
    recs = recommend_recipes(user_ingredients, df, alpha, beta, gamma, top_n)
    if recs.empty:
        print("No recommendations found.")
        return None
    avg_rating = recs['rating'].mean()
    avg_coverage = recs['ingredient_coverage'].mean()
    print(f"Average rating of top {top_n} recommendations: {avg_rating:.2f}")
    print(f"Average ingredient coverage: {avg_coverage:.2%}")
    return recs

# Example evaluation with ingredients
evaluate_recommendations(['chicken', 'garlic', 'onion'], recipes, alpha=0.6, beta=0.3, gamma=0.1)



Average rating of top 10 recommendations: 4.44
Average ingredient coverage: 17.68%


Unnamed: 0,title,ingredient_match_count,ingredient_coverage,rating,final_score
9725,Chicken with Tomatillo Sauce and Braised Fruit,3.0,0.230769,4.375,1.956731
1626,"Tofu, Fried, with Pork and Black-Bean Sauce (P...",3.0,0.1875,5.0,1.95625
7856,Braised Lamb Shanks with Prunes,3.0,0.157895,5.0,1.947368
6760,Chicken Fricassée with Creamy Sweet-and-Sour D...,3.0,0.230769,3.75,1.944231
7697,Brooklyn Feijoada,3.0,0.1875,4.375,1.94375
7693,Chicken and Cashew Stir-Fry,3.0,0.157895,4.375,1.934868
13764,Noodle Soup with Soy-Cooked Pork (Slack Season...,3.0,0.157895,4.375,1.934868
15088,Chicken and Artichoke Stew,3.0,0.157895,4.375,1.934868
11453,Pollo en Pipian Verde,3.0,0.15,4.375,1.9325
5685,Smoked Chicken Chowder,3.0,0.15,4.375,1.9325


In [25]:
from itertools import combinations

def recommend_with_subsets(user_ingredients, df, min_match=1, max_missing=1, alpha=0.6, beta=0.3, gamma=0.1, top_n=10):
    pantry_ingredients = ['water', 'oil', 'salt', 'pepper']
    pantry_cols = [ing for ing in pantry_ingredients if ing in df.columns]

    results = pd.DataFrame()

    # To avoid duplicates, keep a set of seen recipe indices
    seen_indices = set()

    # Check subsets from largest to smallest (to prioritize bigger matches)
    for r in range(len(user_ingredients), min_match-1, -1):
        for subset in combinations(user_ingredients, r):
            valid_ingredients = [ing for ing in subset if ing in df.columns]
            if not valid_ingredients:
                continue

            temp_df = df.copy()
            temp_df['ingredient_match_count'] = temp_df[valid_ingredients].sum(axis=1)
            temp_df['pantry_ingredient_count'] = temp_df[pantry_cols].sum(axis=1) if pantry_cols else 0
            temp_df['missing_ingredients'] = temp_df['total_ingredients'] - temp_df['ingredient_match_count'] - temp_df['pantry_ingredient_count']

            filtered_df = temp_df[temp_df['missing_ingredients'] <= max_missing]

            filtered_df['ingredient_coverage'] = filtered_df['ingredient_match_count'] / filtered_df['total_ingredients']
            filtered_df['final_score'] = (alpha * filtered_df['ingredient_match_count'] +
                                         beta * filtered_df['ingredient_coverage'] +
                                         gamma * filtered_df['rating_norm'])

            # Exclude recipes already seen
            filtered_df = filtered_df[~filtered_df.index.isin(seen_indices)]

            # Keep track of indices to avoid duplicates
            seen_indices.update(filtered_df.index)

            results = pd.concat([results, filtered_df])

    # Sort final combined results by final_score descending and get top_n
    results_sorted = results.sort_values('final_score', ascending=False).head(top_n)

    return results_sorted[['title', 'ingredient_match_count', 'missing_ingredients', 'ingredient_coverage', 'rating', 'final_score']]


In [26]:
user_ingredients = ['onion', 'chicken', 'garlic', 'tomatoes']
recommendations = recommend_with_subsets(user_ingredients, recipes, min_match=1, max_missing=1, top_n=10)
print(recommendations)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['ingredient_coverage'] = filtered_df['ingredient_match_count'] / filtered_df['total_ingredients']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['final_score'] = (alpha * filtered_df['ingredient_match_count'] +
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['ingr

                                             title  ingredient_match_count  \
14622                     All-Butter Pastry Dough                      0.0   
10551  Pomegranate and Fennel Glazed Rack of Lamb                      0.0   
9126                                Ras-El-Hanout                      0.0   
9096                                 Simple Syrup                      0.0   
18534       Gingered Steak and Napa Cabbage Salad                      0.0   
10114                                Simple Syrup                      0.0   
5346                                Ras-El-Hanout                      0.0   
16498                  Pierogi and Vareniki Dough                      0.0   
1666                                    Gremolata                      0.0   
10331                                   Gremolata                      0.0   

       missing_ingredients  ingredient_coverage  rating  final_score  
14622                  1.0                  0.0   5.000       0.1000  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['ingredient_coverage'] = filtered_df['ingredient_match_count'] / filtered_df['total_ingredients']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['final_score'] = (alpha * filtered_df['ingredient_match_count'] +
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['ingr

In [27]:
def recommend_recipes_with_missing(user_ingredients, df, alpha=0.6, beta=0.3, gamma=0.1, top_n=10):
    """
    Recommend recipes and show missing ingredients to complete each recipe.
    """
    valid_ingredients = [ing for ing in user_ingredients if ing in df.columns]
    if not valid_ingredients:
        print("No valid ingredients found.")
        return pd.DataFrame()

    # Calculate match count
    df['ingredient_match_count'] = df[valid_ingredients].sum(axis=1)

    # Calculate missing ingredients (list)
    def get_missing_ingredients(row):
        recipe_ings = [col for col in df.columns if col not in ['title', 'rating', 'calories', 'protein', 'fat', 'sodium',
                                                              'ingredient_match_count', 'missing_ingredients_list',
                                                              'ingredient_coverage', 'rating_norm', 'final_score',
                                                              'total_ingredients']]
        missing = [ing for ing in recipe_ings if row[ing] == 1 and ing not in valid_ingredients]
        return missing

    df['missing_ingredients_list'] = df.apply(get_missing_ingredients, axis=1)

    # Calculate coverage ratio
    df['ingredient_coverage'] = df['ingredient_match_count'] / df['total_ingredients']

    # Normalize rating if not done yet
    if 'rating_norm' not in df.columns:
        max_rating = df['rating'].max()
        min_rating = df['rating'].min()
        df['rating_norm'] = (df['rating'] - min_rating) / (max_rating - min_rating)

    # Final scoring
    df['final_score'] = (alpha * df['ingredient_match_count'] +
                         beta * df['ingredient_coverage'] +
                         gamma * df['rating_norm'])

    # Filter to recipes with at least one matched ingredient
    filtered_df = df[df['ingredient_match_count'] > 0]

    # Sort and pick top_n
    top_recipes = filtered_df.sort_values('final_score', ascending=False).head(top_n)

    # Select relevant columns including missing ingredients list
    return top_recipes[['title', 'ingredient_match_count', 'missing_ingredients_list', 'ingredient_coverage', 'rating', 'final_score']]


In [28]:
user_ingredients = ['onion', 'cheese', 'eggs', 'tomato']
recommendations = recommend_recipes_with_missing(user_ingredients, recipes)
for idx, row in recommendations.iterrows():
    print(f"{idx+1}. {row['title']}")
    print(f"   Matched ingredients: {row['ingredient_match_count']}")
    print(f"   Missing ingredients: {', '.join(row['missing_ingredients_list']) if row['missing_ingredients_list'] else 'None'}")
    print(f"   Coverage: {row['ingredient_coverage']:.2%}")
    print(f"   Rating: {row['rating']}")
    print(f"   Score: {row['final_score']:.3f}\n")


4190. Three-Cheese Fondue with Tomato Onion Chutney 
   Matched ingredients: 3.0
   Missing ingredients: gourmet, winter
   Coverage: 37.50%
   Rating: 5.0
   Score: 2.012

17671. Handmade Pasta with Pancetta, Cherry Tomatoes, and Herbs 
   Matched ingredients: 3.0
   Missing ingredients: bon appétit, dairy, herb, pasta, pork
   Coverage: 27.27%
   Rating: 5.0
   Score: 1.982

7509. Chile-Marinated Pork Sandwiches on Cemita Rolls 
   Matched ingredients: 3.0
   Missing ingredients: avocado, broil, gourmet, hot pepper, marinate, pork, sandwich, sauté
   Coverage: 15.79%
   Rating: 5.0
   Score: 1.947

19872. Artichoke and Feta Cheese Pizza 
   Matched ingredients: 3.0
   Missing ingredients: bake, bon appétit, feta, quick & easy, spring, vegetarian
   Coverage: 18.75%
   Rating: 4.375
   Score: 1.944

8373. Linguine with Spicy Leek and Tomato Sauce 
   Matched ingredients: 3.0
   Missing ingredients: bon appétit, dairy, garlic, kid-friendly, pasta, quick & easy, sauté, vegetarian
   Cov

#### some UI stuff to test the code

In [29]:
import ipywidgets as widgets
from IPython.display import display, clear_output

# Input widget for ingredients
ingredients_input = widgets.Text(
    value='eggs, cheese, onion',
    placeholder='Enter ingredients separated by commas',
    description='Ingredients:',
    disabled=False,
    layout=widgets.Layout(width='70%')
)

# Slider to select number of recipes to show
top_n_slider = widgets.IntSlider(
    value=5,
    min=1,
    max=20,
    step=1,
    description='Top N:',
    continuous_update=False,
    layout=widgets.Layout(width='50%')
)

# Button to trigger recommendations
recommend_button = widgets.Button(description="Get Recommendations")

# Output widget to show results
output = widgets.Output()

def on_recommend_clicked(b):
    with output:
        clear_output()
        user_ings = [i.strip().lower() for i in ingredients_input.value.split(',') if i.strip()]
        top_n = top_n_slider.value
        if not user_ings:
            print("Please enter at least one ingredient.")
            return
        results = recommend_recipes_with_missing(user_ings, recipes, top_n=top_n)
        if results.empty:
            print("No recipes found matching your ingredients.")
            return
        for i, row in results.iterrows():
            print(f"{i+1}. {row['title']}")
            print(f"   Matched: {int(row['ingredient_match_count'])}, Missing: {', '.join(row['missing_ingredients_list']) or 'None'}")
            print(f"   Coverage: {row['ingredient_coverage']:.2%}, Rating: {row['rating']:.2f}")
            print(f"   Score: {row['final_score']:.3f}\n")

recommend_button.on_click(on_recommend_clicked)

# Display the UI
display(ingredients_input, top_n_slider, recommend_button, output)


Text(value='eggs, cheese, onion', description='Ingredients:', layout=Layout(width='70%'), placeholder='Enter i…

IntSlider(value=5, continuous_update=False, description='Top N:', layout=Layout(width='50%'), max=20, min=1)

Button(description='Get Recommendations', style=ButtonStyle())

Output()