In [1]:
import pandas as pd
import re
import os

In [2]:
food_classes_df = pd.read_csv('food_classes.csv')
recipes_df = pd.read_csv('recipes.csv')

In [3]:
food_classes_df.head()

Unnamed: 0,ID,Name,Impact / kg,Parent ID
0,1,Beef Mince,,2.0
1,2,Beef,2.649402,3.0
2,3,Ruminant Meat,5.508226,4.0
3,4,Meat,1.310414,
4,5,Pork Mince,,6.0


In [4]:
recipes_df.head()

Unnamed: 0,Recipe ID,Recipe Name,Ingredient Name,Ingredient Weight / kg
0,1,Spaghetti Bolognese,beef mince,0.25
1,1,Spaghetti Bolognese,Pork Mince,0.25
2,1,Spaghetti Bolognese,Carrots,0.1
3,1,Spaghetti Bolognese,Onions,0.25
4,1,Spaghetti Bolognese,Celery,0.1


# Testing Normalise  Name Function

In [5]:
def normalize_name(name: str) -> str:
    name = re.sub(r'[^\w\s]', '', name)  # Remove any characters that aren't word characters
    name = name.lower()  # Convert to lowercase
    name = ' '.join(sorted(name.split()))  # Arrange words in alphabetical order and join
    return name

In [6]:
print(normalize_name("Chicken Breast"))     
print(normalize_name("Spaghetti Bolognese!"))  
print(normalize_name("Eggplant Parmesan."))
print(normalize_name("pork, loin steak"))

breast chicken
bolognese spaghetti
eggplant parmesan
loin pork steak


In [7]:
recipes_df_dummy = recipes_df.copy()

In [8]:
recipes_df_dummy['Normalized Ingredient Name'] = recipes_df_dummy['Ingredient Name'].apply(normalize_name)
recipes_df_dummy.head()

Unnamed: 0,Recipe ID,Recipe Name,Ingredient Name,Ingredient Weight / kg,Normalized Ingredient Name
0,1,Spaghetti Bolognese,beef mince,0.25,beef mince
1,1,Spaghetti Bolognese,Pork Mince,0.25,mince pork
2,1,Spaghetti Bolognese,Carrots,0.1,carrots
3,1,Spaghetti Bolognese,Onions,0.25,onions
4,1,Spaghetti Bolognese,Celery,0.1,celery


# Testing Food Classes Hierarchy

In [9]:
food_classes_df_dummy = food_classes_df.copy()
rows, cols = food_classes_df_dummy.shape
print(f"The DataFrame has {rows} rows and {cols} columns.")


The DataFrame has 71 rows and 4 columns.


In [10]:
food_classes_df_dummy.head()

Unnamed: 0,ID,Name,Impact / kg,Parent ID
0,1,Beef Mince,,2.0
1,2,Beef,2.649402,3.0
2,3,Ruminant Meat,5.508226,4.0
3,4,Meat,1.310414,
4,5,Pork Mince,,6.0


In [11]:
food_classes_df_dummy = food_classes_df_dummy.dropna(subset=['Impact / kg', 'Parent ID'])

In [12]:
food_classes_df_dummy.shape
print(f"The DataFrame has {rows} rows and {cols} columns.")

The DataFrame has 71 rows and 4 columns.


In [13]:
food_classes_df_dummy.head()

Unnamed: 0,ID,Name,Impact / kg,Parent ID
1,2,Beef,2.649402,3.0
2,3,Ruminant Meat,5.508226,4.0
6,7,Pork & Poultry,4.548882,4.0
7,8,Carrots,9.811718,9.0
8,9,Root Vegetables,7.781639,10.0


In [14]:
# Class to represent a Food Class
class FoodClass:
    def __init__(self, id: int, name: str, impact: float, parent_id: int):
        self.id = id
        self.name = name
        self.impact = impact
        self.parent_id = parent_id

# Function to build the food class hierarchy
def build_food_class_hierarchy(df: pd.DataFrame) -> dict:
    food_classes = {}
    for _, row in df.iterrows():
        food_classes[row['ID']] = FoodClass(row['ID'], row['Name'], row['Impact / kg'], row['Parent ID'])
    return food_classes

In [15]:
food_classes = build_food_class_hierarchy(food_classes_df_dummy)
print('\nFood Class Hierarchy:')
for class_id, food_class in food_classes.items():
    print(f'ID: {food_class.id}, Name: {food_class.name}, Impact {food_class.impact}, Parent ID: {food_class.parent_id}')


Food Class Hierarchy:
ID: 2, Name: Beef, Impact 2.649402237, Parent ID: 3.0
ID: 3, Name: Ruminant Meat, Impact 5.508225903, Parent ID: 4.0
ID: 7, Name: Pork & Poultry, Impact 4.548881992, Parent ID: 4.0
ID: 8, Name: Carrots, Impact 9.811717734, Parent ID: 9.0
ID: 9, Name: Root Vegetables, Impact 7.781639031, Parent ID: 10.0
ID: 11, Name: Onions, Impact 9.000471107, Parent ID: 12.0
ID: 12, Name: Onions and Leeks, Impact 4.764500409, Parent ID: 13.0
ID: 13, Name: Bulbs, Impact 7.706728936, Parent ID: 10.0
ID: 14, Name: Celery, Impact 0.2183862344, Parent ID: 15.0
ID: 15, Name: Stem Vegetables, Impact 5.441168674, Parent ID: 10.0
ID: 16, Name: Garlic, Impact 4.436081072, Parent ID: 13.0
ID: 18, Name: Herbs, Impact 2.102187648, Parent ID: 19.0
ID: 19, Name: Leaves, Impact 4.161051102, Parent ID: 10.0
ID: 20, Name: Olive Oil, Impact 8.084466052, Parent ID: 21.0
ID: 23, Name: Tomatoes, Impact 1.048766001, Parent ID: 24.0
ID: 24, Name: Fruit Vegetables, Impact 2.404733034, Parent ID: 10.0
ID

In [16]:
new_food_class_id = 72
new_food_class_name = "Orange"
new_food_class_impact = 0.3  
new_food_class_parent_id = 1 

# Create a new FoodClass instance
new_food_class = FoodClass(new_food_class_id, new_food_class_name, new_food_class_impact, new_food_class_parent_id)

# Add it to the food_classes dictionary
food_classes[new_food_class_id] = new_food_class

# Verify the new class was added
print(f"Added new food class: ID = {new_food_class.id}, Name = {new_food_class.name}, "
      f"Impact = {new_food_class.impact}, Parent ID = {new_food_class.parent_id}")

# Display the updated food_classes dictionary
print("\nUpdated Food Class Hierarchy:")
for class_id, food_class in food_classes.items():
    print(f"ID: {food_class.id}, Name: {food_class.name}, Impact: {food_class.impact}, Parent ID: {food_class.parent_id}")

Added new food class: ID = 72, Name = Orange, Impact = 0.3, Parent ID = 1

Updated Food Class Hierarchy:
ID: 2, Name: Beef, Impact: 2.649402237, Parent ID: 3.0
ID: 3, Name: Ruminant Meat, Impact: 5.508225903, Parent ID: 4.0
ID: 7, Name: Pork & Poultry, Impact: 4.548881992, Parent ID: 4.0
ID: 8, Name: Carrots, Impact: 9.811717734, Parent ID: 9.0
ID: 9, Name: Root Vegetables, Impact: 7.781639031, Parent ID: 10.0
ID: 11, Name: Onions, Impact: 9.000471107, Parent ID: 12.0
ID: 12, Name: Onions and Leeks, Impact: 4.764500409, Parent ID: 13.0
ID: 13, Name: Bulbs, Impact: 7.706728936, Parent ID: 10.0
ID: 14, Name: Celery, Impact: 0.2183862344, Parent ID: 15.0
ID: 15, Name: Stem Vegetables, Impact: 5.441168674, Parent ID: 10.0
ID: 16, Name: Garlic, Impact: 4.436081072, Parent ID: 13.0
ID: 18, Name: Herbs, Impact: 2.102187648, Parent ID: 19.0
ID: 19, Name: Leaves, Impact: 4.161051102, Parent ID: 10.0
ID: 20, Name: Olive Oil, Impact: 8.084466052, Parent ID: 21.0
ID: 23, Name: Tomatoes, Impact: 1.

# Testing Get Impact from Class IDs

In [17]:
def get_impact(food_class_id: int) -> float:
    food_class = food_classes.get(food_class_id)
    if not food_class:
        raise ValueError(f"No food class found for ID {food_class_id}")
    if not pd.isna(food_class.impact):
        return food_class.impact
    if pd.isna(food_class.parent_id):
        raise ValueError(f"No impact available and no parent to recurse for {food_class.name}")
    return get_impact(food_class.parent_id)

In [18]:
test_ids = [2, 43, 32] 

for food_class_id in test_ids:
    try:
        impact = get_impact(food_class_id)
        print(f"Food Class ID {food_class_id}: Impact = {impact} kg CO2")
    except ValueError as e:
        print(f"Food Class ID {food_class_id}: {e}")

Food Class ID 2: Impact = 2.649402237 kg CO2
Food Class ID 43: No food class found for ID 43
Food Class ID 32: Impact = 7.57055912 kg CO2


# Testing the Recipe Calculator

In [19]:
def calculate_recipe_impact(recipe: pd.DataFrame) -> float:
    total_impact = 0
    matched_classes = []
    for _, ingredient in recipe.iterrows():
        ingredient_name = normalize_name(ingredient['Ingredient Name'])
        matched_class = next(
            (fc for fc in food_classes.values() if normalize_name(fc.name) == ingredient_name), None)
        if not matched_class:
            print(f"Ingredient '{ingredient_name}' not found.")
            return None
        # Store the matched class for visualization
        matched_classes.append((ingredient_name, matched_class.name))
        try:
            impact = get_impact(matched_class.id)
            total_impact += impact * ingredient['Ingredient Weight / kg']
        except ValueError as e:
            print(e)
            return None
    matched_df = pd.DataFrame(matched_classes)
    display(matched_df)
    return total_impact

In [20]:
for recipe_id, recipe_data in recipes_df.groupby('Recipe ID'):
    recipe_name = recipe_data['Recipe Name'].iloc[0]  # Get the recipe name
    impact = calculate_recipe_impact(recipe_data)
    if impact is not None:
        print(f"Recipe ID {recipe_id} - {recipe_name}: Total Impact = {impact} kg CO2")

Ingredient 'beef mince' not found.
Ingredient 'caster golden sugar' not found.
Ingredient 'onions red' not found.
Ingredient 'caster sugar' not found.


In [21]:
# Class to represent a Food Class
class FoodClass:
    def __init__(self, id: int, name: str, impact: float, parent_id: int):
        self.id = id
        self.name = name
        self.impact = impact
        self.parent_id = parent_id

# Function to build the food class hierarchy
def build_food_class_hierarchy(df: pd.DataFrame) -> dict:
    food_classes = {}
    for _, row in df.iterrows():
        food_classes[row['ID']] = FoodClass(row['ID'], row['Name'], row['Impact / kg'], row['Parent ID'])
    return food_classes

In [22]:
food_classes_df_dummy_2 = food_classes_df.copy()
food_classes_df_dummy_2 = food_classes_df_dummy_2.dropna()
food_classes_2 = build_food_class_hierarchy(food_classes_df_dummy_2)
print('\nFood Class Hierarchy:')
for class_id, food_class in food_classes_2.items():
    print(f'ID: {food_class.id}, Name: {food_class.name}, Impact {food_class.impact}, Parent ID: {food_class.parent_id}')


Food Class Hierarchy:
ID: 2, Name: Beef, Impact 2.649402237, Parent ID: 3.0
ID: 3, Name: Ruminant Meat, Impact 5.508225903, Parent ID: 4.0
ID: 7, Name: Pork & Poultry, Impact 4.548881992, Parent ID: 4.0
ID: 8, Name: Carrots, Impact 9.811717734, Parent ID: 9.0
ID: 9, Name: Root Vegetables, Impact 7.781639031, Parent ID: 10.0
ID: 11, Name: Onions, Impact 9.000471107, Parent ID: 12.0
ID: 12, Name: Onions and Leeks, Impact 4.764500409, Parent ID: 13.0
ID: 13, Name: Bulbs, Impact 7.706728936, Parent ID: 10.0
ID: 14, Name: Celery, Impact 0.2183862344, Parent ID: 15.0
ID: 15, Name: Stem Vegetables, Impact 5.441168674, Parent ID: 10.0
ID: 16, Name: Garlic, Impact 4.436081072, Parent ID: 13.0
ID: 18, Name: Herbs, Impact 2.102187648, Parent ID: 19.0
ID: 19, Name: Leaves, Impact 4.161051102, Parent ID: 10.0
ID: 20, Name: Olive Oil, Impact 8.084466052, Parent ID: 21.0
ID: 23, Name: Tomatoes, Impact 1.048766001, Parent ID: 24.0
ID: 24, Name: Fruit Vegetables, Impact 2.404733034, Parent ID: 10.0
ID

In [23]:
def calculate_recipe_impact_2(recipe: pd.DataFrame) -> float:
    total_impact = 0
    matched_classes = []
    for _, ingredient in recipe.iterrows():
        ingredient_name = normalize_name(ingredient['Ingredient Name'])
        matched_class = next(
            (fc for fc in food_classes_2.values() if normalize_name(fc.name) == ingredient_name), None)
        if not matched_class:
            print(f"Ingredient '{ingredient_name}' not found.")
            return None
        # Store the matched class for visualization
        matched_classes.append((ingredient_name, matched_class.name))
        try:
            impact = get_impact(matched_class.id)
            total_impact += impact * ingredient['Ingredient Weight / kg']
        except ValueError as e:
            print(e)
            return None
    matched_df = pd.DataFrame(matched_classes)
    display(matched_df)
    return total_impact

In [24]:
for recipe_id, recipe_data in recipes_df_dummy.groupby('Recipe ID'):
    recipe_name = recipe_data['Recipe Name'].iloc[0]  # Get the recipe name
    impact = calculate_recipe_impact_2(recipe_data)
    if impact is not None:
        print(f"Recipe ID {recipe_id} - {recipe_name}: Total Impact = {impact} kg CO2")

Ingredient 'beef mince' not found.
Ingredient 'caster golden sugar' not found.
Ingredient 'onions red' not found.
Ingredient 'caster sugar' not found.


In [25]:
from fuzzywuzzy import process

In [34]:
def calculate_recipe_impact_3(recipe: pd.DataFrame) -> float:
    total_impact = 0
    matched_classes = []
    
    # Extract food class names for fuzzy matching
    food_class_names = [fc.name for fc in food_classes_2.values()]
    
    for _, ingredient in recipe.iterrows():
        ingredient_name = normalize_name(ingredient['Ingredient Name'])
        
        # Use fuzzy matching to find the closest food class name
        closest_match, score = process.extractOne(ingredient_name, food_class_names)
        
        # Define a threshold for the match score
        if score < 80:  # You can adjust this threshold
            print(f"Ingredient '{ingredient_name}' not closely matched to any food class.")
            continue
        
        matched_class = next(fc for fc in food_classes.values() if fc.name == closest_match)
        matched_classes.append({"Ingredient Name": ingredient_name, "Matched Food Class": matched_class.name})
        
        try:
            impact = get_impact(matched_class.id)
            total_impact += impact * ingredient['Ingredient Weight / kg']
        except ValueError as e:
            print(e)
            return None
    
    matched_df = pd.DataFrame(matched_classes)
    print("Matched Ingredient Names and Food Class Names:")
    display(matched_df)
    
    return total_impact


In [35]:
for recipe_id, recipe_data in recipes_df_dummy.groupby('Recipe ID'):
    recipe_name = recipe_data['Recipe Name'].iloc[0]  # Get the recipe name
    impact = calculate_recipe_impact_3(recipe_data)
    if impact is not None:
        print(f"Recipe ID {recipe_id} - {recipe_name}: Total Impact = {impact} kg CO2")

Ingredient 'mince pork' not closely matched to any food class.
Ingredient 'rosemary' not closely matched to any food class.
Ingredient 'spaghetti' not closely matched to any food class.
Matched Ingredient Names and Food Class Names:


Unnamed: 0,Ingredient Name,Matched Food Class
0,beef mince,Beef
1,carrots,Carrots
2,onions,Onions
3,celery,Celery
4,garlic,Garlic
5,oil olive,Olive Oil
6,plum tinned tomatoes,Tomatoes
7,water,Water
8,red wine,Wine


Recipe ID 1 - Spaghetti Bolognese: Total Impact = 9.063161232359999 kg CO2
Ingredient 'fingers lady' not closely matched to any food class.
Ingredient 'cocoa powder' not closely matched to any food class.
Matched Ingredient Names and Food Class Names:


Unnamed: 0,Ingredient Name,Matched Food Class
0,cream double,Double Cream
1,mascarpone,Mascarpone
2,marsala,Marsala
3,caster golden sugar,Sugar
4,coffee granules,Coffee
5,water,Water
6,chocolate dark,Dark Chocolate


Recipe ID 2 - Tiramisu: Total Impact = 4.428267687335 kg CO2
Ingredient 'harissa' not closely matched to any food class.
Matched Ingredient Names and Food Class Names:


Unnamed: 0,Ingredient Name,Matched Food Class
0,chickpeas,Chickpeas
1,coriander,Coriander
2,parsley,Parsley
3,onions red,Onions
4,tomatoes,Tomatoes
5,oil olive,Olive Oil
6,juice lemon,Lemons


Recipe ID 3 - Chickpea Salad: Total Impact = 11.169223540593 kg CO2
Ingredient 'gum xanthum' not closely matched to any food class.
Ingredient 'milk powder' not closely matched to any food class.
Matched Ingredient Names and Food Class Names:


Unnamed: 0,Ingredient Name,Matched Food Class
0,cream double,Double Cream
1,caster sugar,Sugar


Recipe ID 4 - Salted Caramel Ice Cream: Total Impact = 0.8426292078000001 kg CO2


In [28]:
food_classes_df_dummy_3 = food_classes_df.copy()
food_classes_3 = build_food_class_hierarchy(food_classes_df_dummy_3)
print('\nFood Class Hierarchy:')
for class_id, food_class in food_classes_3.items():
    print(f'ID: {food_class.id}, Name: {food_class.name}, Impact {food_class.impact}, Parent ID: {food_class.parent_id}')


Food Class Hierarchy:
ID: 1, Name: Beef Mince, Impact nan, Parent ID: 2.0
ID: 2, Name: Beef, Impact 2.649402237, Parent ID: 3.0
ID: 3, Name: Ruminant Meat, Impact 5.508225903, Parent ID: 4.0
ID: 4, Name: Meat, Impact 1.310413655, Parent ID: nan
ID: 5, Name: Pork Mince, Impact nan, Parent ID: 6.0
ID: 6, Name: Pork, Impact nan, Parent ID: 7.0
ID: 7, Name: Pork & Poultry, Impact 4.548881992, Parent ID: 4.0
ID: 8, Name: Carrots, Impact 9.811717734, Parent ID: 9.0
ID: 9, Name: Root Vegetables, Impact 7.781639031, Parent ID: 10.0
ID: 10, Name: Vegetables, Impact 3.161091821, Parent ID: nan
ID: 11, Name: Onions, Impact 9.000471107, Parent ID: 12.0
ID: 12, Name: Onions and Leeks, Impact 4.764500409, Parent ID: 13.0
ID: 13, Name: Bulbs, Impact 7.706728936, Parent ID: 10.0
ID: 14, Name: Celery, Impact 0.2183862344, Parent ID: 15.0
ID: 15, Name: Stem Vegetables, Impact 5.441168674, Parent ID: 10.0
ID: 16, Name: Garlic, Impact 4.436081072, Parent ID: 13.0
ID: 17, Name: Rosemary, Impact nan, Paren

In [29]:
def calculate_recipe_impact_4(recipe: pd.DataFrame) -> float:
    total_impact = 0
    for _, ingredient in recipe.iterrows():
        ingredient_name = normalize_name(ingredient['Ingredient Name'])
        matched_class = next(
            (fc for fc in food_classes_3.values() if normalize_name(fc.name) == ingredient_name), None)
        if not matched_class:
            print(f"Ingredient '{ingredient_name}' not found.")
            return None
        try:
            impact = get_impact(matched_class.id)
            total_impact += impact * ingredient['Ingredient Weight / kg']
        except ValueError as e:
            print(e)
            return None
    return total_impact

In [31]:
for recipe_id, recipe_data in recipes_df_dummy.groupby('Recipe ID'):
    recipe_name = recipe_data['Recipe Name'].iloc[0]  # Get the recipe name
    impact = calculate_recipe_impact_4(recipe_data)
    if impact is not None:
        print(f"Recipe ID {recipe_id} - {recipe_name}: Total Impact = {impact} kg CO2")

Recipe ID 1 - Spaghetti Bolognese: Total Impact = 13.989865665656001 kg CO2
Recipe ID 2 - Tiramisu: Total Impact = 4.8948132164 kg CO2
Recipe ID 3 - Chickpea Salad: Total Impact = 11.188634844578 kg CO2
Ingredient 'gum xanthum' not found.


# Final Test

In [30]:
# Build the food class hierarchy dictionary
food_classes = build_food_class_hierarchy(food_classes_df)

# Process each recipe
for recipe_id, recipe_data in recipes_df.groupby('Recipe ID'):
    recipe_name = recipe_data['Recipe Name'].iloc[0]  # Get the recipe name
    impact = calculate_recipe_impact(recipe_data)
    if impact is not None:
        print(f"Recipe ID {recipe_id} - {recipe_name}: Total Impact = {impact} kg CO2")

Unnamed: 0,0,1
0,beef mince,Beef Mince
1,mince pork,Pork Mince
2,carrots,Carrots
3,onions,Onions
4,celery,Celery
5,garlic,Garlic
6,rosemary,Rosemary
7,oil olive,Olive Oil
8,plum tinned tomatoes,Tinned Plum Tomatoes
9,water,Water


Recipe ID 1 - Spaghetti Bolognese: Total Impact = 13.989865665656001 kg CO2


Unnamed: 0,0,1
0,cream double,Double Cream
1,mascarpone,Mascarpone
2,marsala,Marsala
3,caster golden sugar,Golden Caster Sugar
4,coffee granules,Coffee Granules
5,water,Water
6,fingers lady,Lady Fingers
7,chocolate dark,Dark Chocolate
8,cocoa powder,Cocoa Powder


Recipe ID 2 - Tiramisu: Total Impact = 4.8948132164 kg CO2


Unnamed: 0,0,1
0,chickpeas,Chickpeas
1,coriander,Coriander
2,parsley,Parsley
3,onions red,Red Onions
4,tomatoes,Tomatoes
5,oil olive,Olive Oil
6,juice lemon,Lemon Juice
7,harissa,Harissa


Recipe ID 3 - Chickpea Salad: Total Impact = 11.188634844578 kg CO2
Ingredient 'gum xanthum' not found.
