In [18]:
import pandas as pd
import pymongo

# Load the cleaned dataset
df = pd.read_csv("my_cleaned_recipe_dataset.csv")

# Connect to MongoDB
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["recipe_db"]
collection = db["recipes"]

# Insert recipes into MongoDB
recipes = df.to_dict(orient="records")
collection.insert_many(recipes)

print("Recipes successfully stored in MongoDB!")


Recipes successfully stored in MongoDB!


In [19]:
# Retrieve one recipe from the database
sample_recipe = collection.find_one()
print(sample_recipe)



{'_id': ObjectId('67c97ff332173461d0fb8694'), 'name': 'Thayir Semiya Recipe (Curd Semiya)', 'image_url': 'https://www.archanaskitchen.com/images/archanaskitchen/1-Author/Raksha_Kamat/Thayir_Curd_Semiya_recipe_Yogurt_Vermicelli_South_indian_Lunch_recipe-4.jpg', 'description': 'Thayir Semiya or Curd Vermicelli is a quick dish which you can make for lunch. If you are bored of eating curd rice everyday, you can always make this Thayir semiya or curd semiya for a change. In South India, people consume curd everyday.\xa0', 'cuisine': 'Indian', 'course': 'Lunch', 'diet': 'Vegetarian', 'prep_time': 35.0, 'ingredients': '1/2 cup Semiya (Vermicelli) , roasted 1 cup Curd (Dahi / Yogurt) For tempering 1 teaspoon Mustard seeds (Rai/ Kadugu) 1/2 teaspoon White Urad Dal (Split) pinch Asafoetida (hing) 1 sprig Curry leaves 2 teaspoon Oil Raw Peanuts (Moongphali) 1 sprig Curry leaves 2 teaspoon Oil 5 to 6 Cashew nuts , for garnishing', 'instructions': 'To begin making the Thayir Semiya recipe, firstly 

In [20]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
import pymongo

# Connect to MongoDB
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["recipe_db"]
collection = db["recipes"]

# Load recipes from MongoDB (fetching only necessary columns)
recipes = list(collection.find({}, {"_id": 0, "name": 1, "ingredients": 1, "instructions": 1}))
df = pd.DataFrame(recipes)

# Convert ingredient lists to strings (for TF-IDF processing)
df["ingredients_str"] = df["ingredients"].apply(lambda x: " ".join(x) if isinstance(x, list) else str(x))

# Convert ingredient lists to TF-IDF Vectors
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df["ingredients_str"])


In [22]:
def search_recipes_with_substitutes(user_ingredients, top_n=5):
    # Expand user ingredients with possible substitutes
    expanded_ingredients = set(user_ingredients)
    for ing in user_ingredients:
        if ing in ingredient_substitutes:
            expanded_ingredients.update(ingredient_substitutes[ing])
    
    # Convert expanded user input into TF-IDF format
    user_query = " ".join(expanded_ingredients)
    query_vector = vectorizer.transform([user_query])

    # Compute similarity scores
    similarities = cosine_similarity(query_vector, tfidf_matrix)[0]

    # Calculate match scores
    match_scores = []
    for idx, sim_score in enumerate(similarities):
        recipe_ingredients = set(df.iloc[idx]["ingredients"])
        
        # Count matching & substitute ingredients
        match_count = sum(1 for ing in expanded_ingredients if ing in recipe_ingredients)
        missing_count = len(user_ingredients) - match_count
        
        # Final score: Similarity Score + Ingredient Match Count - Missing Ingredient Penalty
        final_score = sim_score + (match_count * 1.5) - (missing_count * 0.5)
        
        match_scores.append((idx, final_score))

    # Sort by highest match score
    ranked_recipes = sorted(match_scores, key=lambda x: x[1], reverse=True)[:top_n]
    
    # Return top-ranked recipes
    return df.iloc[[idx for idx, _ in ranked_recipes]][["name", "ingredients", "instructions"]]

# Example Usage
user_ingredients = ["tomato", "garlic", "milk"]  # Even if milk is missing, it can use almond milk!
recommended_recipes = search_recipes_with_substitutes(user_ingredients)
print(recommended_recipes)


                                                          name  \
1504  Paal Kozhukattai Recipe With Bananas - With Vegan Option   
3624                 Beetroot Halwa Recipe (With Vegan Option)   
1595          Matar Masala Recipe - Peas In Onion Tomato Gravy   
3607        Brown Rice Flakes Pudding / Desi Poha Kheer Recipe   
3619                            Kerala Style Rice Halwa Recipe   

                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   ingredients  \
1504                                                     

In [23]:
recommended_recipes.to_csv("recommended_recipes.csv", index=False)
print("Results saved! Open 'recommended_recipes.csv' to view full output.")


Results saved! Open 'recommended_recipes.csv' to view full output.


In [5]:
print("Total Recipes in Dataset:", len(df))


Total Recipes in Dataset: 4235


In [21]:
ingredient_substitutes = {
    "butter": ["margarine", "coconut oil"],
    "cream": ["yogurt", "coconut milk"],
    "garlic": ["garlic powder", "onion"],
    "sugar": ["honey", "stevia"],
    "tomato": ["tomato paste", "sun-dried tomatoes"],
    "milk": ["almond milk", "coconut milk", "soy milk"]
}


In [24]:
pd.set_option("display.max_colwidth", None)  # Show full text in columns
pd.set_option("display.max_rows", 100)  # Show more rows
pd.set_option("display.max_columns", None)  # Show all columns

# Print expanded results
print(recommended_recipes)


                                                          name  \
1504  Paal Kozhukattai Recipe With Bananas - With Vegan Option   
3624                 Beetroot Halwa Recipe (With Vegan Option)   
1595          Matar Masala Recipe - Peas In Onion Tomato Gravy   
3607        Brown Rice Flakes Pudding / Desi Poha Kheer Recipe   
3619                            Kerala Style Rice Halwa Recipe   

                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   ingredients  \
1504                                                     

In [13]:
print("Total Recipes Before Removing Duplicates:", len(df))

# Check for duplicate recipes based on name & ingredients
duplicate_rows = df[df.duplicated(subset=["name", "ingredients"], keep=False)]
print("Number of Duplicate Entries:", len(duplicate_rows))

# Display duplicates
print(duplicate_rows)


Total Recipes Before Removing Duplicates: 12705
Number of Duplicate Entries: 12705
                                                                 name  \
0                                  Thayir Semiya Recipe (Curd Semiya)   
1        Chettinad Style Kara Kuzhambu Recipe with Potato and Brinjal   
2                            Goan Chana Ros Recipe (White Peas Curry)   
3                               Minced Meat And Egg Croquettes Recipe   
4                                                Thekera Tenga Recipe   
...                                                               ...   
12700         Stir Fry Green beans and Tofu with Panch Phoron Recipe    
12701                                Dhuska Recipe (Rice Fried Bread)   
12702               Khatta Meetha Petha Recipe (Yellow Pumpkin Sabzi)   
12703  Patta Gobi Matar Nu Shaak Recipe (Cabbage & Peas Sabzi Recipe)   
12704              Kerala Pumpkin Pachadi Recipe (Parangikai Pachadi)   

                                        

In [14]:
collection.delete_many({})
print("✅ Removed all recipes from MongoDB!")


✅ Removed all recipes from MongoDB!


In [15]:
recipes_cleaned = df.to_dict(orient="records")
collection.insert_many(recipes_cleaned)
print("✅ Inserted cleaned recipes into MongoDB!")


✅ Inserted cleaned recipes into MongoDB!


In [16]:
recipe_count = collection.count_documents({})
print("✅ Total Recipes in MongoDB After Cleaning:", recipe_count)


✅ Total Recipes in MongoDB After Cleaning: 12705


In [17]:
# Delete ALL recipes from MongoDB
collection.delete_many({})

# Verify that the database is now empty
recipe_count = collection.count_documents({})
print("✅ All recipes deleted! Current recipe count in MongoDB:", recipe_count)


✅ All recipes deleted! Current recipe count in MongoDB: 0


In [1]:
import pandas as pd
import pymongo

# Connect to MongoDB
client = pymongo.MongoClient("mongodb://localhost:27017/")
db = client["recipe_db"]
collection = db["recipes"]

# Load dataset
recipes = list(collection.find({}, {"_id": 0}))  # Get all fields
df = pd.DataFrame(recipes)

# Show all column names
print("Available Filters:", df.columns)


Available Filters: Index(['name', 'image_url', 'description', 'cuisine', 'course', 'diet',
       'prep_time', 'ingredients', 'instructions'],
      dtype='object')


In [2]:
# Show unique values for each filter
print("Unique Cuisines:", df["cuisine"].dropna().unique())
print("Unique Courses:", df["course"].dropna().unique())
print("Unique Diets:", df["diet"].dropna().unique())


Unique Cuisines: ['Indian' 'South Indian Recipes' 'Goan Recipes' 'North Indian Recipes'
 'Assamese' 'Tamil Nadu' 'Karnataka' 'Malvani' 'Kerala Recipes'
 'Gujarati Recipes\ufeff' 'Sindhi' 'Konkan' 'Hyderabadi' 'Andhra' 'Coorg'
 'Bengali Recipes' 'Mangalorean' 'Kongunadu' 'Punjabi' 'Udupi' 'Pakistani'
 'Chettinad' 'Rajasthani' 'Nepalese' 'Coastal Karnataka' 'Indo Chinese'
 'Parsi Recipes' 'Kashmiri' 'South Karnataka' 'Mughlai' 'Asian' 'Bihari'
 'Maharashtrian Recipes' 'North East India Recipes' 'Himachal'
 'Uttar Pradesh' 'Awadhi' 'Sichuan' 'North Karnataka' 'Middle Eastern'
 'Oriya Recipes' 'Uttarakhand-North Kumaon ' 'Lucknowi' 'Chinese'
 'Haryana' 'Fusion' 'Unknown' 'Malabar' 'Continental' 'Jharkhand'
 'Sri Lankan' 'Thai' 'Arab' 'Nagaland' 'Afghan' 'African']
Unique Courses: ['Lunch' 'Appetizer' 'Dinner' 'Side Dish' 'South Indian Breakfast'
 'Indian Breakfast' 'Main Course' 'North Indian Breakfast' 'One Pot Dish'
 'World Breakfast' 'Dessert' 'Unknown' 'Snack' 'Brunch']
Unique Diets: [

In [3]:
# Check if any ingredients are empty, None, or wrongly formatted
invalid_rows = df[(df["ingredients"].isna()) | (df["ingredients"] == "[]") | (df["ingredients"] == " ") | (df["ingredients"] == "None")]
print("🔍 Found Invalid Rows:", len(invalid_rows))
print(invalid_rows.head())


🔍 Found Invalid Rows: 0
Empty DataFrame
Columns: [name, image_url, description, cuisine, course, diet, prep_time, ingredients, instructions]
Index: []


In [1]:
df.info()

NameError: name 'df' is not defined