In [10]:
import spacy
from typing import List, Optional
from pydantic import BaseModel

In [None]:
class Ingredient(BaseModel):
    name: str
    description: str
    quantity: float
    measurement_unit: str

class Product(BaseModel):
    id: str
    name: str
    image_url: str
    quantity: float
    measurement_unit: str
    description: str
    category: str
    price: float
    reviews: Optional[int]
    rating: Optional[int]

class Recipe(BaseModel):
    id: str
    name: str
    image_url: str
    category: str
    steps: List[str]
    ingredients: List[Ingredient]
    reviews: Optional[int]
    rating: Optional[int]

In [None]:
# Example usage
recipes = [
    Recipe(
        id="1",
        name="Chocolate Cake",
        image_url="cake.jpg",
        category="dessert",
        steps=["Step 1", "Step 2", "Step 3"],
        ingredients=[
            Ingredient(name="făină", description="Făină albă", quantity=300, measurement_unit= "g"),
            Ingredient(name="zahăr", description="Zahăr granulat", quantity=200, measurement_unit= 'g'),
            Ingredient(name="cacao", description="Cacao neîndulcită", quantity=100, measurement_unit= 'g'),
        ],
        reviews=10,
        rating=4
    ),
    Recipe(
        id="2",
        name="Vegetable Stir-Fry",
        image_url="stir_fry.jpg",
        category="vegetarian",
        steps=["Step 1", "Step 2", "Step 3"],
        ingredients=[
            Ingredient(name="broccoli", description="Broccoli proaspăt", quantity=2, measurement_unit= 'buc'),
            Ingredient(name="morcovi", description="Morcovi feliați", quantity=2, measurement_unit= 'buc'),
            Ingredient(name="tofu", description="Tofu tare, cuburi", quantity=50, measurement_unit= 'g'),
        ],
        reviews=5,
        rating=3
    )
]

products = [
    Product(
        id="1",
        name="făină albă",
        image_url="flour.jpg",
        quantity=1000,
        measurement_unit="g",
        description="Făină versatilă pentru copt și gătit",
        category="păntry",
        price=2.99,
        reviews=100,
        rating=4
    ),
    Product(
        id="2",
        name="zahăr granulat",
        image_url="sugar.jpg",
        quantity=500,
        measurement_unit="g",
        description="Îndulcitor comun pentru băuturi și deserturi",
        category="păntry",
        price=5.5,
        reviews=100,
        rating=4
    ), 
    Product(
        id="3",
        name="cacao neîndulcită",
        image_url="cocoa.jpg",
        quantity="200",
        measurement_unit="g",
        description="Cacao naturală pentru prăjituri și băuturi calde",
        category="păntry",
        price=3.49,
        reviews=80,
        rating=4
    ),
    Product(
        id="4",
        name="ulei de măsline extra virgin",
        image_url="olive_oil.jpg",
        quantity="500",
        measurement_unit="ml",
        description="Ulei de măsline presat la rece, de calitate superioară",
        category="păntry",
        price=7.99,
        reviews=120,
        rating=5
    ),
    Product(
        id="5",
        name="ciocolată neagră 70%",
        image_url="dark_chocolate.jpg",
        quantity="100",
        measurement_unit="g",
        description="Ciocolată neagră cu conținut ridicat de cacao",
        category="dulciuri",
        price=2.49,
        reviews=90,
        rating=4
    ),
    Product(
        id="6",
        name="ardei gras roșu",
        image_url="red_pepper.jpg",
        quantity="1",
        measurement_unit="buc",
        description="Ardei gras roșu, proaspăt și aromat",
        category="fructe & legume",
        price=1.29,
        reviews=50,
        rating=4
    ),
    Product(
        id="7",
        name="zahăr pudra",
        image_url="sugar.jpg",
        quantity="300",
        measurement_unit="g",
        description="Îndulcitor comun pentru băuturi și deserturi",
        category="păntry",
        price=5.5,
        reviews=100,
        rating=4
    ),
]


In [37]:
def find_matching_ingredients(recipe: Recipe, products: List[Product], similarity_threshold: float = 0.7):
    nlp = spacy.load("ro_core_news_md")
    
    matching_ingredients = []
    
    for recipe_ingredient in recipe.ingredients:
        recipe_ingredient_tokens = nlp(recipe_ingredient.name)
        recipe_ingredient_matches = []
        
        for product in products:
            product_tokens = nlp(product.name)
            
            similarity = recipe_ingredient_tokens.similarity(product_tokens)
            
            if similarity >= similarity_threshold:
                recipe_ingredient_matches.append((product, similarity))
        
        recipe_ingredient_matches = sorted(recipe_ingredient_matches, key=lambda x: x[1], reverse=True)
        
        if len(recipe_ingredient_matches) > 0:
            closest_quantity_product = None
            closest_quantity_diff = float('inf')
            
            for product, similarity in recipe_ingredient_matches:
                if product.quantity > recipe_ingredient.quantity:
                    quantity_diff = abs(product.quantity - recipe_ingredient.quantity)
                    
                    if quantity_diff < closest_quantity_diff:
                        closest_quantity_product = product
                        closest_quantity_diff = quantity_diff
            
            if closest_quantity_product is not None:
                matching_ingredients.append((recipe_ingredient, closest_quantity_product))
    
    return matching_ingredients

In [126]:
## Aggregated list of recipes

import spacy
from typing import List, Optional, Dict
from pydantic import BaseModel

class Ingredient(BaseModel):
    name: str
    description: str
    quantity: float
    measurement_unit: str

class Product(BaseModel):
    id: str
    name: str
    image_url: str
    quantity: float
    measurement_unit: str
    description: str
    category: str
    price: float
    reviews: Optional[int]
    rating: Optional[int]

class Recipe(BaseModel):
    id: str
    name: str
    image_url: str
    category: str
    steps: List[str]
    ingredients: List[Ingredient]
    reviews: Optional[int]
    rating: Optional[int]




def aggregate_ingredients(recipes: List[Recipe], products: List[Product], similarity_threshold: float = 0.3) -> Dict[str, Product]:
    nlp = spacy.load("ro_core_news_md")
    aggregated_ingredients = {}
    aggregated_ingredient_names = set()
    
    for recipe in recipes:
        for recipe_ingredient in recipe.ingredients:
            recipe_ingredient_tokens = nlp(recipe_ingredient.name)
            existing_ingredient = aggregated_ingredients.get(recipe_ingredient.name)
            
            if existing_ingredient:
                # Update existing ingredient with aggregated quantity
                existing_ingredient.quantity += recipe_ingredient.quantity
            else:
                # Add new ingredient to aggregated list
                aggregated_ingredients[recipe_ingredient.name] = recipe_ingredient
    
    matching_products = {}
    
    for ingredient_name, ingredient in aggregated_ingredients.items():
        ingredient_tokens = nlp(ingredient_name)
        recipe_ingredient_matches = []
        
        for product in products:
            product_tokens = nlp(product.name)
            similarity = ingredient_tokens.similarity(product_tokens)
            
            if similarity >= similarity_threshold:
                recipe_ingredient_matches.append((product, similarity)) #, abs(product.quantity - ingredient.quantity)
        
        if recipe_ingredient_matches:
            recipe_ingredient_matches = sorted(recipe_ingredient_matches, key=lambda x: x[1], reverse=True)  #lambda x: (x[1], x[2])
            
            matching_products[ingredient_name] = recipe_ingredient_matches[0][0]
    
    return matching_products


# Example usage
recipes = [
    Recipe(
        id="1",
        name="Chocolate Cake",
        image_url="cake.jpg",
        category="dessert",
        steps=["Step 1", "Step 2", "Step 3"],
        ingredients=[
            Ingredient(name="făină", description="Făină albă", quantity=300, measurement_unit="g"),
            Ingredient(name="zahăr", description="Zahăr granulat", quantity=100, measurement_unit="g"),
            Ingredient(name="cacao", description="Cacao neîndulcită", quantity=100, measurement_unit="g"),
        ],
        reviews=10,
        rating=4
    ),
    Recipe(
        id="2",
        name="Vanilla cake",
        image_url="vanilla.jpg",
        category="dessert",
        steps=["Step 1", "Step 2", "Step 3"],
        ingredients=[
            Ingredient(name="broccoli", description="Broccoli", quantity=10, measurement_unit="buc"),
            Ingredient(name="zahăr", description="Zahăr granulat", quantity=100, measurement_unit="g"),
            Ingredient(name="cacao", description="Cacao neîndulcită", quantity=200, measurement_unit="g"),
        ], 
        reviews = 15, 
        rating = 4.5 
    )
]

products = [
    Product(
        id="1",
        name="făină albă",
        image_url="flour.jpg",
        quantity=1000,
        measurement_unit="g",
        description="Făină versatilă pentru copt și gătit",
        category="păntry",
        price=2.99,
        reviews=100,
        rating=4
    ),
    Product(
        id="2",
        name="zahăr granulat",
        image_url="sugar.jpg",
        quantity=500,
        measurement_unit="g",
        description="Îndulcitor comun pentru băuturi și deserturi",
        category="păntry",
        price=5.5,
        reviews=100,
        rating=4
    ), 
    Product(
        id="3",
        name="cacao neîndulcită",
        image_url="cacao.jpg",
        quantity=200,
        measurement_unit="g",
        description="Cacao naturală pentru prăjituri și băuturi calde",
        category="păntry",
        price=3.49,
        reviews=80,
        rating=4
    ),
    Product(
        id="4",
        name="ulei de măsline extra virgin",
        image_url="olive_oil.jpg",
        quantity=500,
        measurement_unit="ml",
        description="Ulei de măsline presat la rece, de calitate superioară",
        category="păntry",
        price=7.99,
        reviews=120,
        rating=5
    ),
    Product(
        id="5",
        name="ciocolată neagră 70%",
        image_url="dark_chocolate.jpg",
        quantity=100,
        measurement_unit="g",
        description="Ciocolată neagră cu conținut ridicat de cacao",
        category="dulciuri",
        price=2.49,
        reviews=90,
        rating=4
    ),
    Product(
        id="6",
        name="ardei gras roșu",
        image_url="red_pepper.jpg",
        quantity=1,
        measurement_unit="buc",
        description="Ardei gras roșu, proaspăt și aromat",
        category="fructe & legume",
        price=1.29,
        reviews=50,
        rating=4
    ),
    Product(
        id="7",
        name="zahăr pudra",
        image_url="sugar.jpg",
        quantity=300,
        measurement_unit="g",
        description="Îndulcitor comun pentru băuturi și deserturi",
        category="păntry",
        price=5.5,
        reviews=100,
        rating=4
    ),
    Product(
        id="8",
        name="broccoli",
        image_url="broccoli.jpg",
        quantity=5,
        measurement_unit="buc",
        description="Îndulcitor comun pentru băuturi și deserturi",
        category="păntry",
        price=5.5,
        reviews=100,
        rating=4
    ),
]


In [130]:
## Correct method, but Product class should have multiplier column added when defining the class

import spacy
from typing import List, Optional, Dict
from pydantic import BaseModel

class Ingredient(BaseModel):
    name: str
    description: str
    quantity: float
    measurement_unit: str

class Product(BaseModel):
    id: str
    name: str
    image_url: str
    quantity: float
    measurement_unit: str
    description: str
    category: str
    price: float
    reviews: Optional[int]
    rating: Optional[int]
    multiplier: Optional[int]  # New column for multiplier

class Recipe(BaseModel):
    id: str
    name: str
    image_url: str
    category: str
    steps: List[str]
    ingredients: List[Ingredient]
    reviews: Optional[int]
    rating: Optional[int]


def aggregate_ingredients(recipes: List[Recipe], products: List[Product], similarity_threshold: float = 0.3) -> Dict[str, Product]:
    nlp = spacy.load("ro_core_news_md")
    aggregated_ingredients = {}
    aggregated_ingredient_names = set()

    for recipe in recipes:
        for recipe_ingredient in recipe.ingredients:
            recipe_ingredient_tokens = nlp(recipe_ingredient.name)
            existing_ingredient = aggregated_ingredients.get(recipe_ingredient.name)

            if existing_ingredient:
                # Update existing ingredient with aggregated quantity
                existing_ingredient.quantity += recipe_ingredient.quantity
            else:
                # Add new ingredient to aggregated list
                aggregated_ingredients[recipe_ingredient.name] = recipe_ingredient

    matching_products = {}

    for ingredient_name, ingredient in aggregated_ingredients.items():
        ingredient_tokens = nlp(ingredient_name)
        recipe_ingredient_matches = []

        for product in products:
            product_tokens = nlp(product.name)
            similarity = ingredient_tokens.similarity(product_tokens)

            if similarity >= similarity_threshold:
                recipe_ingredient_matches.append((product, similarity))

        if recipe_ingredient_matches:
            recipe_ingredient_matches = sorted(recipe_ingredient_matches, key=lambda x: x[1], reverse=True)
            best_match_product = recipe_ingredient_matches[0][0]
            multiplier = 1  # Default multiplier

            if best_match_product.quantity < ingredient.quantity:
                multiplier = int(ingredient.quantity / best_match_product.quantity)  # Calculate multiplier

            best_match_product.multiplier = multiplier  # Assign multiplier to the best match product
            matching_products[ingredient_name] = best_match_product

    return matching_products 

In [131]:
##Hackish method

import spacy
from typing import List, Optional, Dict
from pydantic import BaseModel

class Ingredient(BaseModel):
    name: str
    description: str
    quantity: float
    measurement_unit: str

class Product(BaseModel):
    id: str
    name: str
    image_url: str
    quantity: float
    measurement_unit: str
    description: str
    category: str
    price: float
    reviews: Optional[int]
    rating: Optional[int]

class MultiplierProduct(Product):
    multiplier: int

class Recipe(BaseModel):
    id: str
    name: str
    image_url: str
    category: str
    steps: List[str]
    ingredients: List[Ingredient]
    reviews: Optional[int]
    rating: Optional[int]

def aggregate_ingredients(recipes: List[Recipe], products: List[Product], similarity_threshold: float = 0.3) -> Dict[str, Product]:
    nlp = spacy.load("ro_core_news_md")
    aggregated_ingredients = {}
    aggregated_ingredient_names = set()

    for recipe in recipes:
        for recipe_ingredient in recipe.ingredients:
            recipe_ingredient_tokens = nlp(recipe_ingredient.name)
            existing_ingredient = aggregated_ingredients.get(recipe_ingredient.name)

            if existing_ingredient:
                # Update existing ingredient with aggregated quantity
                existing_ingredient.quantity += recipe_ingredient.quantity
            else:
                # Add new ingredient to aggregated list
                aggregated_ingredients[recipe_ingredient.name] = recipe_ingredient

    matching_products = {}

    for ingredient_name, ingredient in aggregated_ingredients.items():
        ingredient_tokens = nlp(ingredient_name)
        recipe_ingredient_matches = []

        for product in products:
            product_tokens = nlp(product.name)
            similarity = ingredient_tokens.similarity(product_tokens)

            if similarity >= similarity_threshold:
                recipe_ingredient_matches.append((product, similarity))

        if recipe_ingredient_matches:
            recipe_ingredient_matches = sorted(recipe_ingredient_matches, key=lambda x: x[1], reverse=True)
            best_match_product = recipe_ingredient_matches[0][0]
            multiplier = 1  # Default multiplier

            if isinstance(best_match_product, MultiplierProduct):
                best_match_product.multiplier = multiplier
            else:
                best_match_product = MultiplierProduct(**best_match_product.dict(), multiplier=multiplier)

            if best_match_product.quantity < ingredient.quantity:
                multiplier = int(ingredient.quantity / best_match_product.quantity)  # Calculate multiplier

            best_match_product.multiplier = multiplier  # Assign multiplier to the best match product
            matching_products[ingredient_name] = best_match_product

    return matching_products

In [129]:
aggregate_ingredients(recipes,products)

{'făină': MultiplierProduct(id='1', name='făină albă', image_url='flour.jpg', quantity=1000.0, measurement_unit='g', description='Făină versatilă pentru copt și gătit', category='păntry', price=2.99, reviews=100, rating=4, multiplier=1),
 'zahăr': MultiplierProduct(id='2', name='zahăr granulat', image_url='sugar.jpg', quantity=500.0, measurement_unit='g', description='Îndulcitor comun pentru băuturi și deserturi', category='păntry', price=5.5, reviews=100, rating=4, multiplier=1),
 'cacao': MultiplierProduct(id='3', name='cacao neîndulcită', image_url='cacao.jpg', quantity=200.0, measurement_unit='g', description='Cacao naturală pentru prăjituri și băuturi calde', category='păntry', price=3.49, reviews=80, rating=4, multiplier=2),
 'broccoli': MultiplierProduct(id='8', name='broccoli', image_url='broccoli.jpg', quantity=5.0, measurement_unit='buc', description='Îndulcitor comun pentru băuturi și deserturi', category='păntry', price=5.5, reviews=100, rating=4, multiplier=2)}