In [207]:
import pandas as pd

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.metrics.pairwise import cosine_similarity

In [208]:
def load_recipes():
    recipes_df = pd.read_csv('RAW_recipes.csv', on_bad_lines = 'skip')
    return recipes_df

# load_recipes
- Load the data from the file
- Skip bad lines

In [209]:
def preprocess_data(recipes_df):
    def token_func(x):
        processed_ingredients = []
        ingredients = x.split(', ')
        # Split into individual ingredients
        
        for ingredient in ingredients:
            processed_ingredients.append(normalize_ingredient(ingredient.strip().lower()))
            # Add processed ingredient to list
        return processed_ingredients

    vectorizer = CountVectorizer(tokenizer = token_func)
    # Use CountVectorizer with the custom token

        
    ingredient_matrix = vectorizer.fit_transform(recipes_df['ingredients'])
    # Transform the 'ingredients' col into a matrix
    # col is ingredient, row is recipe

    return ingredient_matrix, vectorizer

# preprocess_data
- token_func breaks down each recipe into smaller parts
    - Helps with dividing up the list of ingredientse and later compare the inventory with each recipe
- Convert the text into tokens for handling

In [210]:
def get_inventory():
    ingredients = input("Enter the ingredients you have. Ensure to put commas after each one. Example: eggs, flour, tomatoes.\n")
    
    inventory_of_ingredients = [item.strip().lower() for item in ingredients.split(',')]

    return inventory_of_ingredients

# get_inventory
- Strip spaces
- Lowercase ingredients
- Split into a list

In [211]:
def match_inventory_to_recipes(inventory, recipes_df):
    ingredient_matrix, vectorizer = preprocess_data(recipes_df)
    # vectorizer = preprocess_data(recipes_df)
    
    user_vector = vectorizer.transform([' '.join(inventory)])
    similarity_scores = cosine_similarity(user_vector, ingredient_matrix)
    
    # Check if there's matches
    # If not, provide a different type of recommendation
    if not similarity_scores.nonzero()[1].size:
        print("No exact matches found in dataset. Below are the recipes with partial matches.")
        # Recommends partial matches
        recommended_recipes = recipes_df.sample(10)
    else:
        top_indices = similarity_scores.argsort()[0][-10:][::-1]
        # Top of the recipes
        
        recommended_recipes = recipes_df.iloc[top_indices]
    return recommended_recipes


def display_recommendations(recommended_recipes):
    if recommended_recipes.empty:
        print("There are no recipes matching your inventory.")
    else:
        print("Based on your inventory, here are some recommended recipes:")
        for index, row in recommended_recipes.iterrows():
            print(f"- {row['name']}")

# match_inventory_to_recipes
- Change inventory into a vector
- Cosine similarity for vector and matrix
- Check for matches and provide partial matches if needed

In [212]:
# Main
recipes_df = load_recipes()
# Load recipes from data

user_inventory = get_inventory()
# Get user input for ingredients

recommended_recipes = match_inventory_to_recipes(user_inventory, recipes_df)
# Find the matching recipes

display_recommendations(recommended_recipes)
# Display recommendations

# Testing
# print(recipes_df.columns)
# print(recipes_df.head())

Enter the ingredients you have. Ensure to put commas after each one. Example: eggs, flour, tomatoes.
 tomatoes




Based on your inventory, here are some recommended recipes:
- hot italian tomato and cream cheese dip
- dump soup
- taquitos casserole
- a can of this and a can of that crock pot chili
- carne machaca
- balsamic tomato aspic
- slow cooked hearty chili
- unique chicken tacos
- ziti baked with spinach  tomatoes  and smoked gouda
- chipotle bean dip
