In [1]:
import re

conversion_factors = {
    'cup': 240,   
    'tbsp': 15,   
    'tsp': 5      
}

def get_ingredient_type(ingredient_name):
    if any(liquid in ingredient_name for liquid in ['milk', 'oil', 'water', 'butter', 'extract']):
        return 'ml'
    elif ingredient_name == 'egg':
        return None
    else:
        return 'g'



# extract the amount, unit, and ingredient name
def convert_to_standard_units(ingredient_str):
    # Extract the amount using a regex to handle fractions and decimals
    amount_match = re.search(r'(\d+\/\d+|\d+\.\d+|\d+)', ingredient_str)
    if amount_match:
        amount_str = amount_match.group(1)
        if '/' in amount_str:
            amount = eval(amount_str)  # Convert fraction (e.g., 3/4 to 0.75)
        else:
            amount = float(amount_str)  # Convert decimal or whole number
    else:
        amount = 1  # Default to 1 if no amount is specified
    
    # Identify the measurement unit (e.g., cup, tbsp, tsp)
    for unit in conversion_factors:
        if unit in ingredient_str:
            measurement = unit
            break
    else:
        measurement = None  # Handle other cases
    
    # Extract the ingredient name by removing the amount and unit
    if measurement is None:
        ingredient_name = ' '.join(ingredient_str.split()[1:])
    else:
        ingredient_name = ' '.join(ingredient_str.split()[2:])  # Example to get "white flour"

    # Step 2: Convert the amount to standardized units
    if measurement:
        units = amount * conversion_factors[measurement]
    else:
        units = amount  # If no measurement is found, assume raw amount
    
    # # Step 3: Lookup the health and taste scores
    # if ingredient_name in ingredient_scores:
    #     health_score = ingredient_scores[ingredient_name]['health']
    #     taste_score = ingredient_scores[ingredient_name]['taste']
    # else:
    #     health_score = 5  # Default health score if ingredient is unknown
    #     taste_score = 5  # Default taste score if ingredient is unknown
    
    return {
        'ingredient': ingredient_name,
        'amount': units,
        'units':  get_ingredient_type(ingredient_name),  # The fixed unit system
        # 'health': health_score,
        # 'taste': taste_score
    }

# # Example usage for a solid ingredient
# ingredient_str = "3/4 cup white flour"
# result = convert_to_standard_units(ingredient_str)
# print(result)

# # Example usage for a liquid ingredient (but treated the same)
# ingredient_str_liquid = "1/2 cup milk"
# result_liquid = convert_to_standard_units(ingredient_str_liquid)
# print(result_liquid)
convert_to_standard_units("2 egg")


{'ingredient': 'egg', 'amount': 2.0, 'units': None}

In [2]:
import json
with open('recipes.json') as f:
    recipes_json = json.load(f)

unique_ingedients = set()

for recipe in recipes_json:
    # print(recipe['name'])
    for line in recipe['ingredients']:
        if 'egg' in line:
            convert_to_standard_units(line)
        # print(line)
        # print(convert_to_standard_units(line))
        unique_ingedients.add(convert_to_standard_units(line)['ingredient'])
        if convert_to_standard_units(line)['ingredient'] == '':
            print(convert_to_standard_units(line))

In [3]:
"egg" in unique_ingedients

True

In [4]:
len(unique_ingedients)

59

In [5]:
unique_ingedients

{'almond butter',
 'almond flour',
 'applesauce',
 'baking powder',
 'baking soda',
 'brown sugar',
 'butter',
 'canned pumpkin',
 'chocolate bar of choice',
 'chocolate chips',
 'chopped nuts',
 'cinnamon',
 'cocoa powder',
 'coconut flour',
 'coconut oil',
 'coconut sugar',
 'cream cheese',
 'dutch cocoa powder',
 'egg',
 'finely chopped almonds',
 'finely chopped walnuts',
 'finely ground almond flour',
 'freeze-dried raspberries',
 'hazelnuts',
 'honey',
 'lemon juice',
 'maple syrup',
 'mashed banana',
 'melted coconut oil',
 'melted vegan butter',
 'milk of choice',
 'nut butter',
 'oat flour',
 'oats',
 'oil',
 'peanut butter',
 'powdered erythritol',
 'powdered sugar',
 'protein powder',
 'pumpkin pie spice',
 'pure maple syrup',
 'pure peppermint extract',
 'pure vanilla',
 'pure vanilla extract',
 'quick oats',
 'regular sugar',
 'rolled oats',
 'salt',
 'salted butter',
 'shredded coconut',
 'spelt flour',
 'sugar',
 'tahini',
 'uncut stevia',
 'unsweetened shredded coconut'

In [6]:
ingredient_scores = {
    'pure peppermint extract': {"health": 6, "taste": 7},
    'almond butter': {"health": 8, "taste": 8},
    'almond flour': {"health": 9, "taste": 7},
    'applesauce': {"health": 8, "taste": 7},
    'baking powder': {"health": 3, "taste": 5},
    'baking soda': {"health": 3, "taste": 4},
    'brown sugar': {"health": 4, "taste": 8},
    'butter': {"health": 4, "taste": 9},
    'canned pumpkin': {"health": 9, "taste": 6},
    'chocolate bar of choice': {"health": 6, "taste": 10},
    'chocolate chips': {"health": 5, "taste": 10},
    'chopped nuts': {"health": 8, "taste": 7},
    'cinnamon': {"health": 7, "taste": 8},
    'cocoa powder': {"health": 8, "taste": 8},
    'coconut flour': {"health": 8, "taste": 6},
    'coconut oil': {"health": 6, "taste": 9},
    'coconut sugar': {"health": 7, "taste": 7},
    'cream cheese': {"health": 4, "taste": 8},
    'dutch cocoa powder': {"health": 8, "taste": 8},
    'egg': {"health": 8, "taste": 7},
    'finely chopped almonds': {"health": 8, "taste": 7},
    'finely chopped walnuts': {"health": 8, "taste": 7},
    'finely ground almond flour': {"health": 9, "taste": 7},
    'freeze-dried raspberries': {"health": 9, "taste": 6},
    'hazelnuts': {"health": 8, "taste": 7},
    'honey': {"health": 6, "taste": 8},
    'lemon juice': {"health": 7, "taste": 6},
    'maple syrup': {"health": 6, "taste": 9},
    'mashed banana': {"health": 9, "taste": 7},
    'melted coconut oil': {"health": 6, "taste": 9},
    'melted vegan butter': {"health": 5, "taste": 8},
    'milk of choice': {"health": 6, "taste": 7},
    'nut butter': {"health": 7, "taste": 8},
    'oat flour': {"health": 8, "taste": 7},
    'oats': {"health": 9, "taste": 7},
    'oil': {"health": 5, "taste": 9},
    'peanut butter': {"health": 7, "taste": 9},
    'powdered erythritol': {"health": 5, "taste": 5},
    'powdered sugar': {"health": 3, "taste": 8},
    'protein powder': {"health": 7, "taste": 6},
    'pumpkin pie spice': {"health": 6, "taste": 8},
    'pure maple syrup': {"health": 6, "taste": 9},
    'pure vanilla': {"health": 5, "taste": 7},
    'pure vanilla extract': {"health": 5, "taste": 7},
    'quick oats': {"health": 9, "taste": 7},
    'regular sugar': {"health": 3, "taste": 9},
    'rolled oats': {"health": 9, "taste": 7},
    'salt': {"health": 3, "taste": 9},
    'salted butter': {"health": 4, "taste": 9},
    'shredded coconut': {"health": 7, "taste": 8},
    'spelt flour': {"health": 8, "taste": 7},
    'sugar': {"health": 3, "taste": 9},
    'tahini': {"health": 8, "taste": 7},
    'uncut stevia': {"health": 6, "taste": 5},
    'unsweetened shredded coconut': {"health": 7, "taste": 8},
    'vegan butter': {"health": 5, "taste": 8},
    'vegetable oil': {"health": 5, "taste": 9},
    'white flour': {"health": 3, "taste": 7},
    'white sugar': {"health": 3, "taste": 9}
}

In [7]:
new_recipes = []

for recipe in recipes_json:
    new_ingredients = []
    for ingredient in recipe['ingredients']:
        converted_ingredient = convert_to_standard_units(ingredient)
        converted_ingredient["health"] = ingredient_scores[converted_ingredient['ingredient']]['health']
        converted_ingredient["taste"] = ingredient_scores[converted_ingredient['ingredient']]['taste']
        new_ingredients.append(converted_ingredient)
    
    new_recipe = {
        'index': recipe['index'],
        'name': recipe['name'],
        'url': recipe['url'],
        'ingredients': new_ingredients
    }
    new_recipes.append(new_recipe)

with open('recipes_expanded.json', 'w') as f:
    json.dump(new_recipes, f, indent=2)