In [13]:
import pandas as pd
import json
import re
from fractions import Fraction
import typing
import os
import ast
from googletrans import Translator, constants

filename = '/Users/Ian van de Wetering/Documents/TUe/Knowledge Engineering 2AMD20/dataset/full_dataset.csv'

df_full = pd.read_csv(filename)

# No need to check and remove Null Values, because the following code that there are no null values
# null_values = df_full.isnull().sum()
# print(null_values)

columns_to_remove = ['directions', 'link', 'source']
df_full = df_full.drop(columns_to_remove, axis=1)

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

df: pd.DataFrame = pd.DataFrame(df_full[:100])
#print(df)

import typing
unitConversions: dict = {
    'c.': '236.588 ml',
    'tsp.': '4.92892 ml',
    'tbsp.': '14.7868 ml',
    'oz.': '29.5735 ml',
    'pt.': '473.176 ml',
    'qt.':' 946.353 ml',
    'lb.': '453.592 grams',
    'gal.': '3785.41 ml',
    ' oz': ' 29.5735 ml',
}

def replace_units(text: str) -> str:
    # replaces american units with european units
    text= text.lower()
    for key, value in unitConversions.items():
        text = text.replace(key, value)
    return text
def replace_fractions(text:str)-> str:
    # replaces fractions with decimals
    text = text.lower()
    text = re.sub(r'(?:(\d+)[-\s])?(\d+/\d+)', frac2string, text)
    return text
def frac2string(s):
    # helper function for replace_fractions
    i, f = s.groups(0)
    f = Fraction(f)
    return str(int(i) + float(f))
# Function to simplify ingredients by only keeping the last word 
def keep_last_word(item):
    words = [x.split()[-1] for x in eval(item)]
    return str(words)
# Function to replace list items based on keyword list
def replace_list_items(lst):
    result = []
    my_list = lst.split(', ')
    for item in my_list:
        item = item[1:-1] # Remove square brackets in the beginning or in the end
        if item[0] == '"':
            item = item[1:]
        if item.endswith('"'): 
            item = item[:-1] # Remove double quotes to get clean strings
        for keyword in keywords:
            if keyword in item:
                item = keyword
                break
        result.append(item)
    return str(result)

def convert_and_multiply_units(text: str) -> list:
    # converts units to floats within strings and multiplies them
    # e.g 1 0.5 0.24 ml becomes 0.12 ml
    ingredients: list = json.loads(text)
    ingredient_list = []
    for ingredient in ingredients: # ingredient is a string with units and one specific ingredient
        ingredient_split: list[str] = re.split(r'\s+|\)|\(', ingredient)
        filtered_result = [item for item in ingredient_split if item]
        converted_values: list= []
        for index, item in enumerate(filtered_result):
            try: # try to turn it into a float and multiply
                converted_value = float(item)
                if len(converted_values) >= 1 and isinstance(converted_values[-1], float):
                    converted_value = converted_values[-1] * converted_value # multiply latest float with current
                    converted_values.pop(-1) # remove latest value, since were using it to multiply
                    converted_values.append(float(converted_value)) 
                else:
                    converted_values.append(converted_value)   
            except ValueError:
                converted_values.append(item)
                pass
            except  IndexError:
                pass
        ingredient_string = " ".join(str(item) for item in converted_values) # join the list back together    
        ingredient_list.append(ingredient_string)
    return ingredient_list


df['ingredients'] = df['ingredients'].apply(lambda text: replace_units(text))
df['ingredients'] = df['ingredients'].apply(lambda text: replace_fractions(text))
df['ingredients'] = df['ingredients'].apply(lambda text: convert_and_multiply_units(text))
#df['NER_simple'] = df['NER'].apply(keep_last_word) #This algorithm is too simple, in stead a mapping function will be used using an extra database

recipe_df=df

# helper functions
def translator(output_path='ah_products_en.csv', write=True) -> pd.DataFrame:
  # translates if file does not exist
    if os.path.isfile(output_path): # if file exists skip
        print(f'File {output_path} already exists, skipping translation')
        ah_products = pd.read_csv(output_path)
    else:
        supermarket_df = pd.read_json('supermarket.json', encoding='UTF-8')
        ah_products = pd.DataFrame(supermarket_df.iloc[0]['d'])
        translator = Translator()
        tarray = []

        for product in ah_products['n']:
            translations = translator.translate(product, src="nl", dest="en")
            tarray.append(translations.text)

        ah_products['l_en'] = tarray
        if write:
            ah_products.to_csv(output_path)
            print(f'Wrote translated products to {output_path}')
        else:
            print("Done translating inplace")
    return ah_products

def product_price_quantity_returner_per_ingredient(product: str, supermarket_df: pd.DataFrame) -> int:
  # Returns the closest product name, price and quantity when given a product string
    candidate_products = supermarket_df[supermarket_df['l_en'].str.contains(f'{product}')]
    if len(candidate_products) == 0:
        print(f'No products found for {product}')
        return 'NaN', 'NaN', 'NaN'  
    min_length_idx = candidate_products['l_en'].str.len().idxmin()
    # Use this index to get the shortest string
    return candidate_products.loc[min_length_idx, 'l_en'], candidate_products.loc[min_length_idx, 'p'], candidate_products.loc[min_length_idx, 's']

def mass_recipe_converter(text: list, products_df: pd.DataFrame) -> list:
    text = text.replace('+', 'NaN') # this is because literal_eval cannot handle + in strings, it's a regex thing
    simple_ingredients: list = ast.literal_eval(text)
    prices = []
    quantities = []
    products = []
    # for each SIMPLE ingredient, find the AH product translation and its price and append it here
    for ingredient in simple_ingredients:
        product, price, quantity = product_price_quantity_returner_per_ingredient(ingredient, products_df)
        prices.append(price)
        quantities.append(quantity)
        products.append(product)
    return products, prices, quantities

# Read the pkl file from https://www.kaggle.com/datasets/shuyangli94/food-com-recipes-and-user-interactions?select=ingr_map.pkl
unpickled_df = pd.read_pickle("ingr_map.pkl")

# Create a list of unique ingredients of the ingr_map.pkl file
keywords = unpickled_df['replaced'].unique().tolist()
# Clean a few obvious items that will disturb the mapping 
keywords = [item.rstrip("'") for item in keywords]
keywords = [item for item in keywords if len(item) != 1]
#append_list = ["water", "sugar"]
#for item in append_list:
    #keywords.append(item)
recipe_df['NER_mapped'] = recipe_df['NER'].apply(replace_list_items)
    
translated_products = translator()
triplet_NER = recipe_df['NER_mapped'].apply(lambda text: mass_recipe_converter(text, translated_products))
recipe_df['NER_product'], recipe_df['NER_price'], recipe_df['NER_quantity'] = zip(*triplet_NER)

recipe_df.head(100)


File ah_products_en.csv already exists, skipping translation
No products found for brown sugar
No products found for cream of mushroom soup
No products found for garlic powder
No products found for chicken gravy
No products found for cream of mushroom soup
No products found for baking potato
No products found for Worcestershire sauce
No products found for condensed milk
No products found for graham cracker crust
No products found for dark sweet pitted cherries
No products found for ginger ale
No products found for boiling water
No products found for almond extract
No products found for paraffin
No products found for pie filling
No products found for condensed milk
No products found for cleaned strawberries
No products found for white cake
No products found for frozen strawberries
No products found for boiling water
No products found for shortening
No products found for shortening
No products found for Frango
No products found for tomato paste
No products found for ground black pepper
N

No products found for frozen strawberries
No products found for black coffee
No products found for tomato paste
No products found for parsley flake
No products found for frozen limas
No products found for green bell pepper
No products found for salad oil
No products found for shortening
No products found for floured blueberries
No products found for cream of chicken soup
No products found for stuffing
No products found for shortening
No products found for flaked coconut
No products found for brown sugar
No products found for Muenster
No products found for instant tea
No products found for Marshmallow Fluff
No products found for red gelatin
No products found for Bisquick
No products found for marjoram
No products found for chicken broth
No products found for lean pork
No products found for bamboo shoot
No products found for water chestnut
No products found for frozen pea pod
No products found for fresh strawberries
No products found for frozen raspberries
No products found for graham cr

No products found for paraffin
No products found for ground pork
No products found for crabmeat
No products found for garlic powder
No products found for Worcestershire sauce
No products found for celery powder
No products found for liquid smoke
No products found for Worcestershire sauce
No products found for fresh mushroom
No products found for egg white
No products found for chicken broth
No products found for liquid smoke
No products found for brown sugar
No products found for Tater
No products found for chicken broth
No products found for cooking oil
No products found for sweet relish
No products found for garlic powder
No products found for light brown sugar
No products found for white grape juice
No products found for cooking spray
No products found for meat dripping
No products found for egg white
No products found for graham cracker
No products found for maraschino cherries
No products found for frozen strawberries
No products found for boiling water
No products found for cresc

No products found for brown gravy mix
No products found for jello
No products found for boiling water
No products found for vanilla pudding
No products found for almond extract
No products found for shortening
No products found for brown sugar
No products found for medium shells
No products found for frozen mixed vegetable
No products found for catsup
No products found for brown sugar
No products found for Worcestershire sauce
No products found for jello
No products found for shallot
No products found for fettucini
No products found for brown sugar
No products found for cilantro
No products found for unbaked pie crust
No products found for persimmon pulp
No products found for Crisco
No products found for crawfish tail
No products found for jalapeno pepper
No products found for cream of mushroom soup
No products found for persimmon
No products found for graham cracker
No products found for maraschino cherries
No products found for brown sugar
No products found for pie shell
No products 

No products found for Season-All
No products found for garlic powder
No products found for Triple Sec
No products found for vodka
No products found for Meyers
No products found for cream of chicken soup
No products found for cream of onion soup
No products found for cream of mushroom soup
No products found for sugar cookie
No products found for egg white
No products found for pimento
No products found for recipe Basic Sweet Dough
No products found for oregano flake
No products found for cooking oil
No products found for crushed strawberries
No products found for cilantro
No products found for beef broth
No products found for white potato
No products found for parsley flake
No products found for frozen broccoli
No products found for cream of mushroom soup
No products found for cream of celery soup
No products found for pimento
No products found for fine spaghetti
No products found for cooking oil
No products found for tuna fish
No products found for cream of mushroom soup
No products fo

Unnamed: 0.1,Unnamed: 0,title,ingredients,NER,NER_mapped,NER_product,NER_price,NER_quantity
0,0,No-Bake Nut Cookies,"[236.588 ml firmly packed brown sugar, 118.294 ml evaporated milk, 2.46446 ml vanilla, 118.294 ml broken nuts pecans, 29.5736 ml butter or margarine, 828.058 ml bite size shredded rice biscuits]","[""brown sugar"", ""milk"", ""vanilla"", ""nuts"", ""butter"", ""bite size shredded rice biscuits""]","['brown sugar', 'milk', 'vanilla', 'nut', 'butter', 'rice']","[NaN, AH Buttermilk, AH Muffin vanilla, AH Coconut, AH Herb butter, AH Sushi rice]","[NaN, 0.85, 1.66, 2.0, 1.25, 2.49]","[NaN, 0,5 l, 300 g, 90 g, 100 g, 500 g]"
1,1,Jewell Ball'S Chicken,"[1.0 small jar chipped beef, cut up, 4.0 boned chicken breasts, 1.0 can cream of mushroom soup, 1.0 carton sour cream]","[""beef"", ""chicken breasts"", ""cream of mushroom soup"", ""sour cream""]","['beef', 'chicken breast', 'cream of mushroom soup', 'sour cream']","[AH roast beef, Encore chicken breast, NaN, AH Oat sour cream]","[2.99, 1.29, NaN, 0.95]","[100 g, 70 g, NaN, 120 g]"
2,2,Creamy Corn,"[946.352 ml pkg. frozen corn, 236.588 ml pkg. cream cheese, cubed, 78.86266666666666 ml butter, cubed, 2.46446 ml garlic powder, 2.46446 ml salt, 1.23223 ml pepper]","[""frozen corn"", ""cream cheese"", ""butter"", ""garlic powder"", ""salt"", ""pepper""]","['corn', 'cream cheese', 'butter', 'garlic powder', 'salt', 'pepper']","[AH Popcorn Salt, AH Soft herb cream cheese, AH Herb butter, NaN, AH Peas 0% salt, AH Red pepper]","[1.05, 0.85, 1.25, NaN, 1.69, 0.44]","[100 g, 125 g, 100 g, NaN, 680 g, per stuk]"
3,3,Chicken Funny,"[1.0 large whole chicken, 621.0435 ml cans chicken gravy, 310.52175 ml can cream of mushroom soup, 177.441 ml box stove top stuffing, 118.294 ml shredded cheese]","[""chicken"", ""chicken gravy"", ""cream of mushroom soup"", ""shredded cheese""]","['chicken', 'chicken gravy', 'cream of mushroom soup', 'cheese']","[AH Bapao chicken, NaN, NaN, AH Liver cheese]","[0.89, NaN, NaN, 1.15]","[2 stuks, NaN, NaN, 150 g]"
4,4,Reeses Cups(Candy),"[236.588 ml peanut butter, 177.441 ml graham cracker crumbs, 236.588 ml melted butter, 453.592 grams 828.058 ml powdered sugar, 1.0 large pkg. chocolate chips]","[""peanut butter"", ""graham cracker crumbs"", ""butter"", ""powdered sugar"", ""chocolate chips""]","['peanut butter', 'rum', 'butter', 'powdered sugar', 'chocolate']","[AH Creamy peanut butter, Drum Blue, AH Herb butter, AH Egg waffles with powdered sugar, AH Hot chocolate]","[2.39, 14.3, 1.25, 1.19, 2.39]","[350 g, 40 g, 100 g, 6 stuks, 8 stuks]"
5,5,Cheeseburger Potato Soup,"[6.0 baking potatoes, 453.592 grams of extra lean ground beef, 157.7253333333333 ml butter or margarine, 1419.528 ml milk, 3.69669 ml salt, 2.46446 ml pepper, 1.5 c 177.441 ml shredded cheddar cheese, divided, 12.0 sliced bacon, cooked, crumbled and divided, 4.0 green onion, chopped and divided, 236.588 ml carton sour cream optional]","[""baking potatoes"", ""extra lean ground beef"", ""butter"", ""milk"", ""salt"", ""pepper"", ""Cheddar cheese"", ""bacon"", ""green onion"", ""sour cream""]","['baking potato', 'ground beef', 'butter', 'milk', 'salt', 'pepper', 'cheese', 'bacon', 'onion', 'sour cream']","[NaN, AH Lean ground beef, AH Herb butter, AH Buttermilk, AH Peas 0% salt, AH Red pepper, AH Liver cheese, AH Party bacon, AH Red onions, AH Oat sour cream]","[NaN, 3.69, 1.25, 0.85, 1.69, 0.44, 1.15, 1.49, 0.99, 0.95]","[NaN, 300 g, 100 g, 0,5 l, 680 g, per stuk, 150 g, 400 g, 3 stuks, 120 g]"
6,6,Rhubarb Coffee Cake,"[354.882 ml sugar, 118.294 ml butter, 1.0 egg, 236.588 ml buttermilk, 473.176 ml flour, 2.46446 ml salt, 4.92892 ml soda, 236.588 ml buttermilk, 473.176 ml rhubarb, finely cut, 4.92892 ml vanilla]","[""sugar"", ""butter"", ""egg"", ""buttermilk"", ""flour"", ""salt"", ""soda"", ""buttermilk"", ""rhubarb"", ""vanilla""]","['sugar', 'butter', 'egg', 'milk', 'flour', 'salt', 'soda', 'milk', 'bar', 'vanilla']","[AH Cane sugar, AH Herb butter, AH Lasagna egg, AH Buttermilk, AH Wheat flour, AH Peas 0% salt, Triangle Liquid soda, AH Buttermilk, AH Rhubarb, AH Muffin vanilla]","[1.89, 1.25, 1.49, 0.85, 0.99, 1.69, 2.39, 0.85, 2.99, 1.66]","[500 g, 100 g, 250 g, 0,5 l, 1 kg, 680 g, 0,73 l, 0,5 l, per bos, 300 g]"
7,7,Scalloped Corn,"[1.0 can cream-style corn, 1.0 can whole kernel corn, 0.5 pkg. approximately 20.0 saltine crackers, crushed, 1.0 egg, beaten, 29.57352 ml butter, divided, pepper to taste]","[""cream-style corn"", ""whole kernel corn"", ""crackers"", ""egg"", ""butter"", ""pepper""]","['cream', 'corn', 'cracker', 'egg', 'butter', 'pepper']","[AH Ice creams, AH Popcorn Salt, AH Nutcracker, AH Lasagna egg, AH Herb butter, AH Red pepper]","[2.99, 1.05, 3.99, 1.49, 1.25, 0.44]","[8 stuks, 100 g, per stuk, 250 g, 100 g, per stuk]"
8,8,Nolan'S Pepper Steak,"[680.3879999999999 grams round steak 1-inch thick , cut into strips, 1.0 can drained tomatoes, cut up save liquid, 414.029 ml water, 118.294 ml onions, 22.1802 ml worcestershire sauce, 2.0 green peppers, diced, 59.147 ml oil]","[""tomatoes"", ""water"", ""onions"", ""Worcestershire sauce"", ""green peppers"", ""oil""]","['tomato', 'water', 'onion', 'Worcestershire sauce', 'green pepper', 'oil']","[AH Roma tomatoes, AH Coconut water, AH Red onions, NaN, Tabasco Mild green pepper sauce, Frying oil]","[2.19, 1.95, 0.99, NaN, 2.99, 7.79]","[750 g, 1 l, 3 stuks, NaN, 60 ml, 2 l]"
9,9,Millionaire Pie,"[1.0 large container cool whip, 1.0 large can crushed pineapple, 1.0 can condensed milk, 3.0 lemons, 236.588 ml pecans, 2.0 graham cracker crusts]","[""pineapple"", ""condensed milk"", ""lemons"", ""pecans"", ""graham cracker crusts""]","['apple', 'condensed milk', 'lemon', 'pecan', 'graham cracker crust']","[AH Pineapple, NaN, AH Bitter lemon, AH Unroasted pecans, NaN]","[2.0, NaN, 0.6900000000000001, 3.99, NaN]","[150 g, NaN, 1 l, 200 g, NaN]"


In [14]:
print(keywords)




['lettuce', 'french vanilla pudding and pie filling mix', 'stove top stuffing mix', 'cream cheese', 'cheddar', 'radicchio', 'pasta sauce', 'tomato sauce', 'shredded three cheese', 'mozzarella', 'cake mix', 'savory herb', 'diced tomato', 'and white chocolate swirled chocolate morsel', 'butter substitute', 'refrigerated cooked beef roast', 'refrigerated white chocolate chip macadamia nut cookie dough', 'crescent recipe creations refrigerated flaky dough sheet', 'perdue short cuts roasted carved chicken breast', 'condensed tomato soup', 'oats and honey crunchy granola bar', 'instant banana cream pudding mix', 'chocolate instant pudding and pie mix', 'sour cream and chive mashed potato', 'red wine', 'dressing', 'condensed cream of celery soup', 'cheesecake instant pudding and pie filling mix', 'olive oil', 'chocolate instant pudding and pie filling mix', 'feta cheese', 'frozen broccoli green beans onions and peppers mix', 'vanilla instant pudding and pie filling mix', 'crushed tomato', 'in