In [1]:
import pandas as pd
import ast
import os

## User Verfication to see where i m picking up data from - Negga man

In [9]:
dataset_directory = r"D:\sample_project_1\Recipe Ai model\archive"
csv_filename = "RecipeNLG_dataset.csv"
recipes_df_full_path = os.path.join(dataset_directory, csv_filename)
print(f"Attempting to load data from: {recipes_df_full_path}")


Attempting to load data from: D:\sample_project_1\Recipe Ai model\archive\RecipeNLG_dataset.csv


In [10]:
recipes_df = pd.DataFrame()
try:
    df_loaded = pd.read_csv(recipes_df_full_path,nrows=50000);
    print(f"Successfully loaded {len(df_loaded)} recipes.")
    print("\n gonna try checking it with the head here ")
    print(df_loaded.head())
   
    print("\n also gonna check the coloumn names")
    print(df_loaded.columns)
    print("\n some information about dataset")
    print(df_loaded.info())
    ##Data cleaning and preparation 
    
    title_column_name = "title"
    ner_column_name ="NER"
    if title_column_name in df_loaded.columns and ner_column_name in df_loaded.columns:
        print(f"\nUsing '{title_column_name}' for recipe titles and '{ner_column_name}' for ingredients.")
        relevant_columns = [title_column_name, ner_column_name]
        df_subset = df_loaded[relevant_columns].copy()
        print(f"\nMissing values before cleaning in NER: {df_subset['NER'].isnull().sum()}")
        print(f"Missing values before cleaning in title: {df_subset['title'].isnull().sum()}")
        df_subset.dropna(subset=['NER', 'title'], inplace=True)
        print(f"Number of recipes after dropping NA in NER/title: {len(df_subset)}")
        def safe_literal_eval(s):
            try:
                return ast.literal_eval(s)
            except(ValueError,SyntaxError,TypeError):
                return []
        print("\n Converting 'NER' column into lists...")
        df_subset['ingredients_list'] = df_subset['NER'].apply(safe_literal_eval)
        df_subset['ingredients_list'] = df_subset['ingredients_list'].apply(
        lambda ingredients: [str(ing).strip().lower() for ing in ingredients if str(ing).strip()]
        )
        print("\nFirst 5 rows with processed 'ingredients_list':")
        print(df_subset[['title', 'NER', 'ingredients_list']].head())

        # Keep only the necessary columns for the recommender
        recipes_df = df_subset[['title', 'ingredients_list']].copy()
        recipes_df.reset_index(drop=True, inplace=True) # Reset index after potential drops

        print(f"\nPrepared dataset with {len(recipes_df)} recipes.")
        print(recipes_df.head())
    else:
        print("Dataset is empty, cannot proceed with cleaning.")
except Exception as e:
    print(f"Error while loading data: {e}")

Error while loading data: [Errno 2] No such file or directory: 'D:\\sample_project_1\\Recipe Ai model\\archive\\RecipeNLG_dataset.csv'


In [4]:
# --- Cell 2: Define Dietary Keywords & NLP Function ---

# Define sets of ingredients for common restrictions
# Using sets for faster lookups
MEAT_INGREDIENTS = set([
    'chicken', 'beef', 'pork', 'lamb', 'turkey', 'fish', 'salmon', 'tuna', 'shrimp', 'bacon',
    'sausage', 'ham', 'veal', 'mutton', 'venison', 'duck', 'goose', 'liver', 'pate', 'mince',
    'meatball', 'meatloaf', 'steak', 'ribs', 'salami', 'pepperoni', 'chorizo', 'prosciutto',
    'ground beef', 'ground pork', 'ground turkey', 'ground lamb', 'cod', 'tilapia', 'halibut',
    'trout', 'sardine', 'mackerel', 'crab', 'lobster', 'clam', 'oyster', 'mussel', 'scallop',
    'giblets', 'hot dog', 'frankfurter', 'biltong', 'jerky'
])

# For simplicity with gluten, we'll be very broad. This will have many false positives & negatives.
# A truly reliable gluten filter is very complex without explicit tagging.
GLUTEN_KEYWORDS_IN_INGREDIENTS = set([
    'wheat', 'barley', 'rye', 'semolina', 'spelt', 'farro', 'triticale', 'kamut', 'einkorn',
    'bread', 'pasta', 'noodle', 'couscous', 'flour', # "flour" is a big one, could be GF flour.
    'bagel', 'muffin', 'croissant', 'cracker', 'biscuit', 'pretzel', 'malt', 'bulgur',
    'seitan', 'brewer\'s yeast' # (some forms)
])
# Note: We will try to be a bit smarter by not flagging if "gluten-free" is in the ingredient.

DAIRY_INGREDIENTS = set([
    'milk', 'cheese', 'butter', 'yogurt', 'cream', 'casein', 'whey', 'condensed milk',
    'evaporated milk', 'ice cream', 'sour cream', 'creme fraiche', 'buttermilk', 'ghee',
    'paneer', 'ricotta', 'parmesan', 'cheddar', 'mozzarella', 'feta'
])

EGG_INGREDIENTS = set(['egg', 'eggs', 'mayonnaise']) # Mayonnaise typically contains eggs

# --- NLP Function to Get User Profile ---
def get_user_profile():
    print("\nTell me about your preferences!")
    user_input_text = input("Any dietary restrictions (e.g., vegetarian, vegan, gluten-free) or preferences? ")
    user_input_lower = user_input_text.lower()
    
    profile = {
        'restrictions': [],
        'preferences': [], # We are not heavily using this yet due to data limitations
        'available_ingredients': []
    }

    # Keywords for restrictions
    vegetarian_keywords = ['vegetarian', 'veggie', 'meat-free', 'no meat']
    vegan_keywords = ['vegan', 'no animal products', 'plant-based']
    gluten_free_keywords = ['gluten-free', 'no gluten', 'celiac']
    
    if any(keyword in user_input_lower for keyword in vegetarian_keywords):
        profile['restrictions'].append('vegetarian')
    if any(keyword in user_input_lower for keyword in vegan_keywords):
        profile['restrictions'].append('vegan')
        if 'vegetarian' not in profile['restrictions']: # Vegan implies vegetarian
            profile['restrictions'].append('vegetarian')
    if any(keyword in user_input_lower for keyword in gluten_free_keywords):
        profile['restrictions'].append('gluten-free')

    # Keywords for preferences (example, can be expanded if data supports it)
    # spicy_keywords = ['spicy', 'hot', 'chili']
    # if any(keyword in user_input_lower for keyword in spicy_keywords):
    #     profile['preferences'].append('spicy')

    ingredients_input_str = input("Enter any ingredients you have (comma-separated, e.g., chicken, tomato, onion) or press Enter: ")
    if ingredients_input_str:
        profile['available_ingredients'] = [ing.strip().lower() for ing in ingredients_input_str.split(',') if ing.strip()]

    print(f"User Profile Created: {profile}")
    return profile

# Test the function (optional, you can call it later in the main loop)
# current_profile = get_user_profile()
# print(current_profile)

In [5]:
# --- Cell 3: Recommendation Engine ---
import pandas as pd # Ensure pandas is imported if you run cells separately

def recommend_recipes(user_profile, local_recipes_df): # Renamed to local_recipes_df to avoid confusion with global
    if local_recipes_df.empty:
        print("Recipe data is empty. Cannot make recommendations.")
        return pd.DataFrame()

    candidate_recipes = local_recipes_df.copy()
    print(f"\nStarting with {len(candidate_recipes)} recipes for recommendation.")

    # 1. Filter by Dietary Restrictions
    if user_profile['restrictions']:
        for restriction in user_profile['restrictions']:
            original_count = len(candidate_recipes)
            if restriction == 'vegetarian':
                def is_vegetarian(ingredients_list):
                    return not any(any(meat_word in ing for meat_word in MEAT_INGREDIENTS) for ing in ingredients_list)
                candidate_recipes = candidate_recipes[candidate_recipes['ingredients_list'].apply(is_vegetarian)]
                print(f"After 'vegetarian' filter: {len(candidate_recipes)} recipes remain (removed {original_count - len(candidate_recipes)}).")

            elif restriction == 'vegan':
                def is_vegan(ingredients_list):
                    if any(any(meat_word in ing for meat_word in MEAT_INGREDIENTS) for ing in ingredients_list): return False
                    if any(any(dairy_word in ing for dairy_word in DAIRY_INGREDIENTS) for ing in ingredients_list): return False
                    if any(any(egg_word in ing for egg_word in EGG_INGREDIENTS) for ing in ingredients_list): return False
                    return True
                candidate_recipes = candidate_recipes[candidate_recipes['ingredients_list'].apply(is_vegan)]
                print(f"After 'vegan' filter: {len(candidate_recipes)} recipes remain (removed {original_count - len(candidate_recipes)}).")

            elif restriction == 'gluten-free':
                # CAUTION: This is a basic and potentially inaccurate gluten filter.
                def is_likely_gluten_free(ingredients_list):
                    for ing_full_text in ingredients_list:
                        # If "gluten-free" or "gluten free" is in the ingredient name, assume it's safe for this check
                        if "gluten-free" in ing_full_text or "gluten free" in ing_full_text:
                            continue
                        # Otherwise, check if any gluten keyword is part of the ingredient
                        if any(gluten_keyword in ing_full_text for gluten_keyword in GLUTEN_KEYWORDS_IN_INGREDIENTS):
                            return False # Found a potential gluten ingredient not explicitly marked GF
                    return True
                candidate_recipes = candidate_recipes[candidate_recipes['ingredients_list'].apply(is_likely_gluten_free)]
                print(f"After 'gluten-free' (approximate) filter: {len(candidate_recipes)} recipes remain (removed {original_count - len(candidate_recipes)}). Note: This filter is basic.")
            
            if candidate_recipes.empty:
                print(f"No recipes left after '{restriction}' filter. Try broader criteria.")
                return pd.DataFrame()


    # 2. Score and Sort by Available Ingredients
    if user_profile['available_ingredients']:
        available_ingredients_set = set(user_profile['available_ingredients'])
        
        if not available_ingredients_set: # Should not happen if list is not empty, but good check
             print("No specific ingredients provided by user for ranking.")
        else:
            def ingredient_match_score(recipe_ingredients_list, user_ingredients_set):
                if not isinstance(recipe_ingredients_list, list): return 0
                
                match_count = 0
                # Convert recipe ingredients to a set for efficient checking if needed,
                # but here we iterate to allow substring matching.
                for user_ing in user_ingredients_set:
                    # Check if the user's ingredient is a substring of any recipe ingredient
                    if any(user_ing in r_ing for r_ing in recipe_ingredients_list):
                        match_count += 1
                return match_count

            candidate_recipes['ingredient_score'] = candidate_recipes['ingredients_list'].apply(
                lambda r_ing_list: ingredient_match_score(r_ing_list, available_ingredients_set)
            )
            
            # Filter out recipes that don't use any of the available ingredients
            candidate_recipes = candidate_recipes[candidate_recipes['ingredient_score'] > 0]
            if candidate_recipes.empty:
                print(f"No recipes found containing any of your specified ingredients: {', '.join(available_ingredients_set)}")
                return pd.DataFrame()

            candidate_recipes = candidate_recipes.sort_values(by='ingredient_score', ascending=False)
            print(f"After scoring by available ingredients: {len(candidate_recipes)} recipes. Top scores shown first.")
    else:
        if not candidate_recipes.empty:
             candidate_recipes = candidate_recipes.sample(frac=1).reset_index(drop=True) # Shuffle if no ingredients
        print("No specific ingredients provided; recommendations will not be specifically scored by ingredient match (results might be shuffled).")


    if candidate_recipes.empty:
        print("\nSorry, no recipes found matching all your criteria. Try broadening your search!")
        return pd.DataFrame()
    else:
        display_cols = ['title', 'ingredients_list']
        if 'ingredient_score' in candidate_recipes.columns and user_profile['available_ingredients']:
            display_cols.append('ingredient_score')
        
        # Ensure all display_cols actually exist in candidate_recipes before trying to select them
        final_display_cols = [col for col in display_cols if col in candidate_recipes.columns]
        return candidate_recipes[final_display_cols].head(5) # Return top 5 suggestions

# Test the recommendation (optional, call it in the main loop)
# Assuming recipes_df is your main DataFrame from Cell 1
# test_profile = {'restrictions': ['vegetarian'], 'preferences': [], 'available_ingredients': ['cheese', 'tomato']}
# if 'recipes_df' in globals() and not recipes_df.empty:
#    test_recommendations = recommend_recipes(test_profile, recipes_df)
#    print("\nTest Recommendations:")
#    print(test_recommendations)
# else:
#    print("recipes_df is not defined or empty. Cannot run test recommendation.")

In [7]:
# --- Cell 4: Main Interaction Loop ---

def main():
    print("Welcome to the Personalized Recipe Suggester! 🍲")
    
    # Ensure recipes_df is loaded and available (it should be from Cell 1)
    if 'recipes_df' not in globals() or recipes_df.empty:
        print("ERROR: The recipe data ('recipes_df') is not loaded or is empty.")
        print("Please ensure Cell 1 (Data Loading) has been run successfully.")
        return

    while True:
        current_user_profile = get_user_profile() # From Cell 2
        
        recommendations = recommend_recipes(current_user_profile, recipes_df) # From Cell 3, using recipes_df from Cell 1

        if not recommendations.empty:
            print("\nHere are some recipe suggestions for you:")
            for index, row in recommendations.iterrows():
                print("------------------------------------------")
                print(f"✨ Recipe: {row['title']} ✨")
                
                # Format ingredients for better readability
                ingredients_str = ", ".join(row['ingredients_list'])
                print(f"  Ingredients: {ingredients_str}")
                
                if 'ingredient_score' in row and current_user_profile['available_ingredients']:
                    print(f"  ⭐ Matched {int(row['ingredient_score'])} of your ingredients.")
            print("------------------------------------------")
        # Message for no recipes is now handled within recommend_recipes

        another_search = input("\nDo you want to search again? (yes/no): ").lower()
        if another_search != 'yes':
            print("Thanks for using the Recipe Suggester! Happy cooking! 😊")
            break

if __name__ == '__main__':
    main()

Welcome to the Personalized Recipe Suggester! 🍲
ERROR: The recipe data ('recipes_df') is not loaded or is empty.
Please ensure Cell 1 (Data Loading) has been run successfully.
