This part of the notebook extracts the ID of each ingredient in our Firebase database and maps it to the ingredient name

In [None]:
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import json

# Path to Firebase credentials and the output JSON file for the ingredient ID mapping
cred_path = './firebase_credentials.json'
output_json_path = './ingredient_id_mapping.json'

# Initialize Firebase
cred = credentials.Certificate(cred_path)
firebase_admin.initialize_app(cred)

# Firestore client
db = firestore.client()

def extract_and_save_ingredient_ids():
    ingredient_id_map = {}
    # Retrieve documents from the 'ingredients' collection
    ingredients = db.collection('ingredients').stream()
    for ingredient in ingredients:
        # Map the document ID to the ingredient name
        ingredient_id_map[ingredient.id] = ingredient.to_dict().get('name')
    
    # Write the ID mapping to a JSON file
    with open(output_json_path, 'w') as json_file:
        json.dump(ingredient_id_map, json_file, indent=4)

    print("Ingredient IDs have been mapped and saved to JSON.")

# Run the function
extract_and_save_ingredient_ids()

This part of the notebook adds to each recipe a list which contains a list of curated ingredients (without the measurments) using the list of all ingredients

In [None]:
import json

# Load the curated ingredients list
with open('./final_ingredients_sorted.json', 'r') as file:
    curated_ingredients = json.load(file)
    curated_ingredients_set = set(ingredient.lower() for ingredient in curated_ingredients)

# Load the recipes
with open('./recipes.json', 'r') as file:
    recipes = json.load(file)

# Helper function to normalize text
def normalize(text):
    # Convert to lowercase and remove common punctuation
    return text.lower().replace(',', '').replace('.', '').replace('(', '').replace(')', '').replace('-', ' ')

# Function to find and add curated ingredients
def add_curated_ingredients(recipe, curated_ingredients_set):
    found_ingredients = set()
    for ingredient in recipe['ingredients']:
        normalized_ingredient = normalize(ingredient)
        # Check for the presence of any curated ingredient as a substring of the noisy ingredient
        for curated in curated_ingredients_set:
            if curated in normalized_ingredient:
                found_ingredients.add(curated)
    # Add found curated ingredients to the recipe
    if found_ingredients:
        recipe['curated_ingredients'] = list(found_ingredients)
    else:
        recipe['curated_ingredients'] = ["No curated ingredients found"]

# Process each recipe
for key, recipe in recipes.items():
    add_curated_ingredients(recipe, curated_ingredients_set)

# Save the modified recipes back to a new JSON file
with open('./updated_recipes_with_curated_ingredients.json', 'w') as file:
    json.dump(recipes, file, indent=4)

print("Recipes have been updated with curated ingredients.")

This part of the notebook replaces in each recipe the list of ingredient names with its corresponding list of ingredient IDs from the mapping

In [None]:
import json

# Load the JSON files
with open('./updated_recipes_with_curated_ingredients.json', 'r') as file:
    recipes = json.load(file)

with open('./ingredient_id_mapping.json', 'r') as file:
    ingredient_mapping = json.load(file)

# Create a reverse mapping from ingredient name to ID
name_to_id = {name.lower(): id for id, name in ingredient_mapping.items()}

# Function to replace ingredient names with IDs in a recipe
def replace_ingredient_names(ingredients, mapping):
    updated_ingredients = []
    for ingredient in ingredients:
        name = ingredient.strip()
        if name in mapping:
            updated_ingredients.append(mapping[name])
        else:
            updated_ingredients.append(name)
    return updated_ingredients

# Apply the function to all recipes
updated_recipes = {}
for key, recipe in recipes.items():
    recipe['curated_ingredients'] = replace_ingredient_names(recipe['curated_ingredients'], name_to_id)
    updated_recipes[key] = recipe

# Save the updated recipes to a new JSON file
output_path = './updated_recipes_with_ids.json'
with open(output_path, 'w') as file:
    json.dump(updated_recipes, file, indent=4)

# Output the path to the new file
output_path