# McCane & Widdowson Product Reduction

Reading in Librarys 

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import re #regular expressions 
from fuzzywuzzy import process, fuzz
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import os 
import glob

Reduced Food List

                                                   Decisions Made

- Focuses on raw ingredients like fruits, vegetables, meats, dairy products, grains, etc. These make up a large part of grocery store sales.

- Includes commonly purchased prepared/packaged foods like bread, cereal, cheese, canned goods, jars, frozen foods, snacks, sweets, spreads, oils, spices, etc. These are staples people buy at supermarkets.

- Contains different variations of the same base ingredient (e.g. yogurt plain/fruit/Greek, milk whole/semi-skimmed/skimmed). Retailers sell multiple varieties.

- Excludes less common items or items typically made at home rather than store-bought.

- Limits the variations of each item to 2-3 options. Retail shelves cannot fit every possible variety.

- Covers foods from all the major categories - produce, dairy, meats, seafood, pantry items, frozen foods, snacks, etc.

In [2]:
food_list = [
  'Ackee, canned',
  'Agar, dried', 
  'Almonds, whole',
  'Amaranth leaves, raw',
  'Anchovies, canned',
  'Apple juice, ambient',
  'Apples, cooking, raw',
  'Apples, eating, raw',
  'Apricots, raw',
  'Arrowroot',
  'Artichoke, raw',
  'Asparagus, raw',
  'Aubergine, raw',   
  'Avocado, raw',
  'Bacon, back rashers, raw',
  'Bagels, plain',
  'Baked beans, canned',
  'Baking powder',
  'Bananas, raw',
  'Barley, pearl, raw',
  'Basil, dried',
  'Beans, butter, canned',
  'Beef, braising steak, raw',
  'Beef, fillet steak, raw',
  'Beetroot, raw',
  'Biscuits, digestives',
  'Biscuits, shortbread',
  'Blackberries, raw',   
  'Blackcurrants, raw',
  'Bread, naan',
  'Bread, pitta',
  'Bread, soda',
  'Bread, tortilla, wheat',
  'Bread, white',
  'Bread, wholemeal',
  'Broccoli, raw',
  'Brussels sprouts, raw',
  'Bubble and squeak, homemade',
  'Bulgur wheat, raw',
  'Butter',
  'Butter, spreadable',
  'Cabbage, raw',
  'Cake, madeira', 
  'Cake, fruit',
  'Cake, chocolate fudge',
  'Canned fish, tuna',
  'Canned fish, sardines',
  'Canned meat, corned beef',
  'Canned vegetables, sweetcorn',
  'Canned vegetables, carrots', 
  'Canned vegetables, peas',
  'Carrots, raw',
  'Cauliflower, raw',
  'Celery, raw',
  'Cereal, cornflakes',
  'Cereal, muesli',
  'Cereal, porridge oats',
  'Cheddar cheese',
  'Cheese, brie',
  'Cheese, feta',
  'Cheese, mozzarella',
  'Cheese, parmesan',
  'Cherries, raw',
  'Chicken, roast',
  'Chicken, stir-fry strips',
  'Chickpeas, canned',
  'Chocolate, milk',
  'Chocolate, dark',
  'Chocolate, white',
  'Coconut, desiccated',
  'Coconut milk',
  'Coffee, instant',
  'Cola drink',
  'Cookies, chocolate chip',
  'Cookies, shortbread',
  'Courgettes, raw',
  'Couscous, plain',
  'Crackers, wholemeal',
  'Crisps, potato',
  'Cucumber, raw',
  'Custard, canned',
  'Dates, ready to eat',
  'Drinking chocolate powder',
  'Dried fruit, raisins',
  'Dried fruit, prunes',
  'Dried fruit, apricots',
  'Duck, roast',
  'Edam cheese',
  'Eggs',
  'Fajita kits',
  'Falafel mix',
  'Figs, ready to eat',
  'Fish fingers, cod',
  'Flour, plain',
  'Flour, self-raising',
  'Fresh fish, salmon',
  'Fresh fish, cod',
  'Fresh fish, haddock',
  'Fresh meat, beef',
  'Fresh meat, lamb',
  'Fresh meat, pork',
  'Fresh meat, chicken',
  'Fruit juices',
  'Fruit squash',
  'Fruits, satsumas',
  'Fruits, plums',
  'Garam masala',
  'Gherkins, pickled',
  'Ginger, ground',
  'Grapes, seedless',
  'Green beans, raw',
  'Herbs, mixed dried',
  'Herbs, fresh basil',
  'Herbs, fresh parsley',
  'Honey',
  'Horseradish sauce',
  'Hot chocolate powder',
  'Ice cream, vanilla',
  'Ice cream, strawberry',
  'Ice cream, chocolate',
  'Instant noodle snacks',
  'Jams',
  'Ketchup',
  'Kiwi fruit',
  'Lager, canned',
  'Lamb, leg, raw',
  'Lamb, shoulder, raw',
  'Leek, raw',
  'Lemons',
  'Lentils, dried',
  'Lettuce, average',
  'Limes',
  'Mackerel, canned',
  'Mangetout, raw',   
  'Margarine',
  'Marmalade',
  'Marzipan',
  'Mayonnaise',
  'Milk, whole',
  'Milk, semi-skimmed',
  'Milk, skimmed',
  'Mince pies',
  'Mints, sweets',
  'Muesli, no added sugar', 
  'Mushrooms, raw',
  'Mussels, raw',
  'Mustard',
  'Noodles, dried',
  'Nuts, peanuts',
  'Nuts, cashew',
  'Nuts, almond',
  'Oats, rolled',
  'Oils, vegetable',
  'Oils, olive',
  'Olives, canned',
  'Onions, raw',
  'Oranges',
  'Pasta, dried',
  'Pasta sauce, jar',
  'Pastry, frozen',
  'Peaches, canned',
  'Peanut butter',
  'Pears, canned',
  'Peas, frozen',
  'Peas, canned',
  'Pepper, black',
  'Peppers, raw',
  'Pickles',
  'Pies, meat',
  'Pies, fruit',
  'Pineapple, canned',
  'Pineapple, fresh',
  'Pistachios',
  'Pizza, refrigerated',
  'Plums, raw',
  'Porridge oats',
  'Pork sausages',
  'Pork pies',
  'Potatoes, old, raw',
  'Potatoes, new, raw',
  'Prawns, frozen',
  'Prunes, ready to eat',
  'Pulses, dried',
  'Quinoa',
  'Rice, basmati',
  'Rice, easy cook',
  'Rice, long grain',
  'Rocket leaves',   
  'Rye bread',
  'Salad dressing',
  'Salmon, smoked',
  'Sardines, canned',
  'Sausages, pork',
  'Sausages, vegetarian',
  'Scotch broth, dried',
  'Seeds, sunflower',
  'Seeds, pumpkin',
  'Self-raising flour',
  'Semi-skimmed milk',
  'Sesame seeds',
  'Smoked fish, mackerel',
  'Smoked meat, ham',
  'Smoked salmon',
  'Soup, canned',
  'Soya milk',
  'Spaghetti, dried',
  'Spices, garlic powder',
  'Spices, cinnamon', 
  'Spinach, raw',
  'Spring greens, raw',
  'Spring onions',
  'Sprouts, alfalfa',
  'Squash, butternut',
  'Stews, canned',
  'Stilton cheese',
  'Strawberries', 
  'Sugar, white',
  'Sugar, brown',
  'Sultanas',
  'Sunflower oil',
  'Sunflower seeds',
  'Swede, raw',
  'Sweetcorn, canned',
  'Sweetcorn, frozen',
  'Sweets, liquorice allsorts',
  'Sweets, jelly babies',
  'Sweets, mints',
  'Swede, raw',
  'Tea bags',
  'Tinned tomatoes',
  'Tinned tuna', 
  'Tofu',
  'Tomatoes, raw',   
  'Tomatoes, canned',
  'Tuna, canned',
  'Turkey, whole', 
  'Turmeric',
  'Vegetables, frozen mix',
  'Vegetable oil',
  'Vegetarian sausages',
  'Vinegar, balsamic',
  'Walnuts',    
  'Water, still',
  'Wheat biscuits',
  'Wheatgerm',
  'Whiting, raw',
  'Wine, red',
  'Wine, white',
  'Yeast, dried',
  'Yogurt, fruit',
  'Yogurt, plain',
  'Yogurt, Greek'
]


In [3]:
len(food_list)

252

In [4]:
food_list2 = [
  'Apricots, dried',
  'Apricots, canned',
  'Baking fat, hard margarine',
  'Beef, corned, canned', 
  'Beef, minced',
  'Beef, roast',
  'Beer, lager',
  'Biscuits, chocolate coated',
  'Biscuits, cream filled',
  'Bread, baguettes',
  'Bread, wraps',
  'Buns, burger',
  'Buns, hot dog',
  'Butter, salted',
  'Cake bars, snack',
  'Cake, madeira, iced',
  'Cake, sponge',
  'Cereal bars',
  'Cereal, shredded wheat',
  'Cereal, crunchy nut cornflakes',
  'Cheese, cheddar, reduced fat',
  'Cheese, cream cheese',
  'Cheese, goat cheese',
  'Cheese, halloumi',
  'Cheese, ricotta',
  'Chicken, boneless breasts',
  'Chicken, drumsticks',
  'Chicken, thigh', 
  'Chicken, turkey, deli meat',
  'Chickpeas, dried',
  'Chocolate bars',
  'Chutney',
  'Coleslaw, prepackaged',
  'Cookies, oat',
  'Cous cous, flavored, plain',
  'Crackers, savory', 
  'Crisps, tortilla',
  'Croissants',
  'Curry paste',
  'Dips, hummus',
  'Dips, salsa',
  'Duck, boneless breasts',
  'Eggs, free range',
  'Filo pastry',
  'Fish, batter',
  'Fish, breaded',
  'Fish, pollock', 
  'Fish, prawns',
  'Fish, smoked haddock',
  'Fish, tuna, canned in oil',
  'Fish, whitefish fillets', 
  'Flour, whole wheat', 
  'Fruit, canned',
  'Fruit, dried',
  'Fruit, prepared',
  'Gelatine, powder',
  'Gherkins',
  'Gravy mix',
  'Ham, sliced',
  'Herbs, dried mixed',
  'Honey, clear',
  'Hummus',
  'Ice cream cones',
  'Jelly, powder',
  'Juices, fruit',
  'Ketchup, reduced sugar',
  'Kitchen towels', 
  'Lamb, diced for stew',
  'Lamb, minced', 
  'Lemonade',
  'Lentils, green, dried',
  'Lentils, red, dried',
  'Lettuce, bagged salad',
  'Lollipops',
  'Meat, deli sliced', 
  'Milk, almond', 
  'Milk, coconut',
  'Milk, lactose free',
  'Mincemeat',
  'Muesli, no added sugar',
  'Muesli, sweetened',
  'Mushrooms, chestnut, dried',
  'Mushrooms, oyster, dried', 
  'Mushrooms, shiitake, dried',
  'Mustard, wholegrain',
  'Noodles, fresh',
  'Nuts, mixed',
  'Oil, canola',
  'Oil, coconut',
  'Oil, corn',
  'Oil, sunflower',
  'Olives, black',
  'Pasta, fresh filled',
  'Pasta, gluten-free',
  'Pasta, lasagna sheets',
  'Pasta, wholewheat',
  'Pastry, shortcrust',
  'Pastry, sweet shortcrust',
  'Peanut butter, crunchy',
  'Peanut butter, smooth',
  'Peas, frozen',
  'Peppers, assorted',
  'Pickles, pickled onions',
  'Pies, pork pies',
  'Pineapple, chunks, canned',
  'Pineapple, crushed, canned',
  'Pizza, fresh',
  'Pizza, frozen',
  'Popcorn',
  'Pork, baby back ribs',
  'Pork, chops',
  'Pork, diced',
  'Pork, leg steaks', 
  'Pork, loin chops',
  'Pork, shoulder',
  'Potatoes, baked',
  'Potatoes, mashed',
  'Prawns, cooked',
  'Prawns, raw', 
  'Quiche',
  'Rice, arborio', 
  'Rice, brown',
  'Rice, microwave pouches',
  'Rice, white',
  'Salmon, smoked, sliced',
  'Salsa', 
  'Sardines, in oil or sauce',
  'Sauces, pasta',
  'Sausages, beef', 
  'Sausages, turkey',
  'Seasonings, mixed',
  'Seeds, chia',  
  'Seeds, sesame',
  'Soup, cup of',
  'Soup, instant sachets',
  'Soy sauce',
  'Spices, allspice',
  'Spices, paprika',
  'Spreads, chocolate', 
  'Spreads, honey',
  'Spreads, jam', 
  'Spreads, yeast extract',
  'Squash, butternut',
  'Sugar, caster', 
  'Sugar, icing',
  'Sugar, muscovado', 
  'Sultanas, golden',
  'Sweetener, granulated',
  'Sweetener, tablets',
  'Sweets, boiled',
  'Sweets, candy',
  'Sweets, chewing gum', 
  'Sweets, fudge',
  'Sweets, gummies',
  'Sweets, lollipops',
  'Tea, black',
  'Tea, camomile',
  'Tea, decaffeinated',
  'Tea, fruit', 
  'Tea, green',
  'Toilet paper',
  'Tomatoes, cherry', 
  'Tomatoes, plum',
  'Tomatoes, sun dried',
  'Tortillas, corn', 
  'Tortillas, flour', 
  'Tuna, canned in spring water',
  'Vinegar, white wine', 
  'Vinegar, red wine',
  'Water, carbonated',
  'Water, flavored',
  'Yeast, fresh', 
  'Yogurt, frozen',
  'Yogurt, low fat varieties',
  'Yogurt, organic',
  'Yogurt, soy',
]

In [5]:
len(food_list2)

176

In [6]:
176 + 252

428

In [7]:
newlist = food_list + food_list2

In [8]:
mFood = pd.read_csv(r"C:\Users\medekar\Desktop\Product_Weight_Project\Data\Raw Data\Labelling2021_watercress2.csv", index_col = 0)

In [9]:
mFood.head()

Unnamed: 0,Food Code,Food Name,Description,Food sub-group codes,Previous,Main data references,Footnote,Energy (kJ),Energy (kcal),Fat (g),Saturates (g),Carbohydrate (g),Sugars (g),Starch (g),Fibre (g),Protein (g),Salt (g),FoodGroup
0,13-145,"Ackee, canned, drained",8 cans,DG,554.0,"MW4, 1978; and Vegetables, Herbs and Spices Su...",,639,155,15.2,4.44,0.8,0.8,0,1.8,2.9,0.6,Vegetables
1,13-146,"Agar, dried",Literature sources,DG,,Wu Leung et al. (1972) Food composition table ...,,721,180,1.2,0.3,0.0,0.0,0,81.1,1.6,0.28,Vegetables
2,13-147,"Agar, dried, soaked and drained",Literature sources,DG,,Wu Leung et al. (1972) Food composition table ...,,127,32,0.1,0.0,0.0,0.0,0,15.0,0.2,0.03,Vegetables
3,13-148,"Alfalfa sprouts, raw",Analytical and literature sources,DG,,"Vegetables, Herbs and Spices Supplement, 1991",,113,27,0.7,0.07,0.3,0.3,0,1.7,4.0,0.02,Vegetables
4,13-801,"Allspice, ground",Literature sources,H,,Marsh et al. (1977) Composition of foods: spic...,,1536,366,8.7,2.5,55.1,55.1,0,21.6,6.1,0.19,Herbs and spices


In [10]:
full_list =  [
 'Ackee, canned',
 'Agar, dried',
 'Almonds, whole',
 'Amaranth leaves, raw',
 'Anchovies, canned',
 'Apple juice, ambient',
 'Apples, cooking, raw',
 'Apples, eating, raw',
 'Apricots, raw',
 'Arrowroot',
 'Artichoke, raw',
 'Asparagus, raw',
 'Aubergine, raw',
 'Avocado, raw',
 'Bacon, back rashers, raw',
 'Bagels, plain',
 'Baked beans, canned',
 'Baking powder',
 'Bananas, raw',
 'Barley, pearl, raw',
 'Basil, dried',
 'Beans, butter, canned',
 'Beef, braising steak, raw',
 'Beef, fillet steak, raw',
 'Beetroot, raw',
 'Biscuits, digestives',
 'Biscuits, shortbread',
 'Blackberries, raw',
 'Blackcurrants, raw',
 'Bread, naan',
 'Bread, pitta',
 'Bread, soda',
 'Bread, tortilla, wheat',
 'Bread, white',
 'Bread, wholemeal',
 'Broccoli, raw',
 'Brussels sprouts, raw',
 'Bubble and squeak, homemade',
 'Bulgur wheat, raw',
 'Butter',
 'Butter, spreadable',
 'Cabbage, raw',
 'Cake, madeira',
 'Cake, fruit',
 'Cake, chocolate fudge',
 'Canned fish, tuna',
 'Canned fish, sardines',
 'Canned meat, corned beef',
 'Canned vegetables, sweetcorn',
 'Canned vegetables, carrots',
 'Canned vegetables, peas',
 'Carrots, raw',
 'Cauliflower, raw',
 'Celery, raw',
 'Cereal, cornflakes',
 'Cereal, muesli',
 'Cereal, porridge oats',
 'Cheddar cheese',
 'Cheese, brie',
 'Cheese, feta',
 'Cheese, mozzarella',
 'Cheese, parmesan',
 'Cherries, raw',
 'Chicken, roast',
 'Chicken, stir-fry strips',
 'Chickpeas, canned',
 'Chocolate, milk',
 'Chocolate, dark',
 'Chocolate, white',
 'Coconut, desiccated',
 'Coconut milk',
 'Coffee, instant',
 'Cola drink',
 'Cookies, chocolate chip',
 'Cookies, shortbread',
 'Courgettes, raw',
 'Couscous, plain',
 'Crackers, wholemeal',
 'Crisps, potato',
 'Cucumber, raw',
 'Custard, canned',
 'Dates, ready to eat',
 'Drinking chocolate powder',
 'Dried fruit, raisins',
 'Dried fruit, prunes',
 'Dried fruit, apricots',
 'Duck, roast',
 'Edam cheese',
 'Eggs',
 'Fajita kits',
 'Falafel mix',
 'Figs, ready to eat',
 'Fish fingers, cod',
 'Flour, plain',
 'Flour, self-raising',
 'Fresh fish, salmon',
 'Fresh fish, cod',
 'Fresh fish, haddock',
 'Fresh meat, beef',
 'Fresh meat, lamb',
 'Fresh meat, pork',
 'Fresh meat, chicken',
 'Fruit juices',
 'Fruit squash',
 'Fruits, satsumas',
 'Fruits, plums',
 'Garam masala',
 'Gherkins, pickled',
 'Ginger, ground',
 'Grapes, seedless',
 'Green beans, raw',
 'Herbs, mixed dried',
 'Herbs, fresh basil',
 'Herbs, fresh parsley',
 'Honey',
 'Horseradish sauce',
 'Hot chocolate powder',
 'Ice cream, vanilla',
 'Ice cream, strawberry',
 'Ice cream, chocolate',
 'Instant noodle snacks',
 'Jams',
 'Ketchup',
 'Kiwi fruit',
 'Lager, canned',
 'Lamb, leg, raw',
 'Lamb, shoulder, raw',
 'Leek, raw',
 'Lemons',
 'Lentils, dried',
 'Lettuce, average',
 'Limes',
 'Mackerel, canned',
 'Mangetout, raw',
 'Margarine',
 'Marmalade',
 'Marzipan',
 'Mayonnaise',
 'Milk, whole',
 'Milk, semi-skimmed',
 'Milk, skimmed',
 'Mince pies',
 'Mints, sweets',
 'Muesli, no added sugar',
 'Mushrooms, raw',
 'Mussels, raw',
 'Mustard',
 'Noodles, dried',
 'Nuts, peanuts',
 'Nuts, cashew',
 'Nuts, almond',
 'Oats, rolled',
 'Oils, vegetable',
 'Oils, olive',
 'Olives, canned',
 'Onions, raw',
 'Oranges',
 'Pasta, dried',
 'Pasta sauce, jar',
 'Pastry, frozen',
 'Peaches, canned',
 'Peanut butter',
 'Pears, canned',
 'Peas, frozen',
 'Peas, canned',
 'Pepper, black',
 'Peppers, raw',
 'Pickles',
 'Pies, meat',
 'Pies, fruit',
 'Pineapple, canned',
 'Pineapple, fresh',
 'Pistachios',
 'Pizza, refrigerated',
 'Plums, raw',
 'Porridge oats',
 'Pork sausages',
 'Pork pies',
 'Potatoes, old, raw',
 'Potatoes, new, raw',
 'Prawns, frozen',
 'Prunes, ready to eat',
 'Pulses, dried',
 'Quinoa',
 'Rice, basmati',
 'Rice, easy cook',
 'Rice, long grain',
 'Rocket leaves',
 'Rye bread',
 'Salad dressing',
 'Salmon, smoked',
 'Sardines, canned',
 'Sausages, pork',
 'Sausages, vegetarian',
 'Scotch broth, dried',
 'Seeds, sunflower',
 'Seeds, pumpkin',
 'Self-raising flour',
 'Semi-skimmed milk',
 'Sesame seeds',
 'Smoked fish, mackerel',
 'Smoked meat, ham',
 'Smoked salmon',
 'Soup, canned',
 'Soya milk',
 'Spaghetti, dried',
 'Spices, garlic powder',
 'Spices, cinnamon',
 'Spinach, raw',
 'Spring greens, raw',
 'Spring onions',
 'Sprouts, alfalfa',
 'Squash, butternut',
 'Stews, canned',
 'Stilton cheese',
 'Strawberries',
 'Sugar, white',
 'Sugar, brown',
 'Sultanas',
 'Sunflower oil',
 'Sunflower seeds',
 'Swede, raw',
 'Sweetcorn, canned',
 'Sweetcorn, frozen',
 'Sweets, liquorice allsorts',
 'Sweets, jelly babies',
 'Sweets, mints',
 'Swede, raw',
 'Tea bags',
 'Tinned tomatoes',
 'Tinned tuna',
 'Tofu',
 'Tomatoes, raw',
 'Tomatoes, canned',
 'Tuna, canned',
 'Turkey, whole',
 'Turmeric',
 'Vegetables, frozen mix',
 'Vegetable oil',
 'Vegetarian sausages',
 'Vinegar, balsamic',
 'Walnuts',
 'Water, still',
 'Wheat biscuits',
 'Wheatgerm',
 'Whiting, raw',
 'Wine, red',
 'Wine, white',
 'Yeast, dried',
 'Yogurt, fruit',
 'Yogurt, plain',
 'Yogurt, Greek',
 'Apricots, dried',
 'Apricots, canned',
 'Baking fat, hard margarine',
 'Beef, corned, canned',
 'Beef, minced',
 'Beef, roast',
 'Beer, lager',
 'Biscuits, chocolate coated',
 'Biscuits, cream filled',
 'Bread, baguettes',
 'Bread, wraps',
 'Buns, burger',
 'Buns, hot dog',
 'Butter, salted',
 'Cake bars, snack',
 'Cake, madeira, iced',
 'Cake, sponge',
 'Cereal bars',
 'Cereal, shredded wheat',
 'Cereal, crunchy nut cornflakes',
 'Cheese, cheddar, reduced fat',
 'Cheese, cream cheese',
 'Cheese, goat cheese',
 'Cheese, halloumi',
 'Cheese, ricotta',
 'Chicken, boneless breasts',
 'Chicken, drumsticks',
 'Chicken, thigh',
 'Chicken, turkey, deli meat',
 'Chickpeas, dried',
 'Chocolate bars',
 'Chutney',
 'Coleslaw, prepackaged',
 'Cookies, oat',
 'Cous cous, flavored, plain',
 'Crackers, savory',
 'Crisps, tortilla',
 'Croissants',
 'Curry paste',
 'Dips, hummus',
 'Dips, salsa',
 'Duck, boneless breasts',
 'Eggs, free range',
 'Filo pastry',
 'Fish, batter',
 'Fish, breaded',
 'Fish, pollock',
 'Fish, prawns',
 'Fish, smoked haddock',
 'Fish, tuna, canned in oil',
 'Fish, whitefish fillets',
 'Flour, whole wheat',
 'Fruit, canned',
 'Fruit, dried',
 'Fruit, prepared',
 'Gelatine, powder',
 'Gherkins',
 'Gravy mix',
 'Ham, sliced',
 'Herbs, dried mixed',
 'Honey, clear',
 'Hummus',
 'Ice cream cones',
 'Jelly, powder',
 'Juices, fruit',
 'Ketchup, reduced sugar',
 'Kitchen towels',
 'Lamb, diced for stew',
 'Lamb, minced',
 'Lemonade',
 'Lentils, green, dried',
 'Lentils, red, dried',
 'Lettuce, bagged salad',
 'Lollipops',
 'Meat, deli sliced',
 'Milk, almond',
 'Milk, coconut',
 'Milk, lactose free',
 'Mincemeat',
 'Muesli, no added sugar',
 'Muesli, sweetened',
 'Mushrooms, chestnut, dried',
 'Mushrooms, oyster, dried',
 'Mushrooms, shiitake, dried',
 'Mustard, wholegrain',
 'Noodles, fresh',
 'Nuts, mixed',
 'Oil, canola',
 'Oil, coconut',
 'Oil, corn',
 'Oil, sunflower',
 'Olives, black',
 'Pasta, fresh filled',
 'Pasta, gluten-free',
 'Pasta, lasagna sheets',
 'Pasta, wholewheat',
 'Pastry, shortcrust',
 'Pastry, sweet shortcrust',
 'Peanut butter, crunchy',
 'Peanut butter, smooth',
 'Peas, frozen',
 'Peppers, assorted',
 'Pickles, pickled onions',
 'Pies, pork pies',
 'Pineapple, chunks, canned',
 'Pineapple, crushed, canned',
 'Pizza, fresh',
 'Pizza, frozen',
 'Popcorn',
 'Pork, baby back ribs',
 'Pork, chops',
 'Pork, diced',
 'Pork, leg steaks',
 'Pork, loin chops',
 'Pork, shoulder',
 'Potatoes, baked',
 'Potatoes, mashed',
 'Prawns, cooked',
 'Prawns, raw',
 'Quiche',
 'Rice, arborio',
 'Rice, brown',
 'Rice, microwave pouches',
 'Rice, white',
 'Salmon, smoked, sliced',
 'Salsa',
 'Sardines, in oil or sauce',
 'Sauces, pasta',
 'Sausages, beef',
 'Sausages, turkey',
 'Seasonings, mixed',
 'Seeds, chia',
 'Seeds, sesame',
 'Soup, cup of',
 'Soup, instant sachets',
 'Soy sauce',
 'Spices, allspice',
 'Spices, paprika',
 'Spreads, chocolate',
 'Spreads, honey',
 'Spreads, jam',
 'Spreads, yeast extract',
 'Squash, butternut',
 'Sugar, caster',
 'Sugar, icing',
 'Sugar, muscovado',
 'Sultanas, golden',
 'Sweetener, granulated',
 'Sweetener, tablets',
 'Sweets, boiled',
 'Sweets, candy',
 'Sweets, chewing gum',
 'Sweets, fudge',
 'Sweets, gummies',
 'Sweets, lollipops',
 'Tea, black',
 'Tea, camomile',
 'Tea, decaffeinated',
 'Tea, fruit',
 'Tea, green',
 'Toilet paper',
 'Tomatoes, cherry',
 'Tomatoes, plum',
 'Tomatoes, sun dried',
 'Tortillas, corn',
 'Tortillas, flour',
 'Tuna, canned in spring water',
 'Vinegar, white wine',
 'Vinegar, red wine',
 'Water, carbonated',
 'Water, flavored',
 'Yeast, fresh',
 'Yogurt, frozen',
 'Yogurt, low fat varieties',
 'Yogurt, organic',
 'Yogurt, soy'
]

In [15]:
%%time
# Create a DataFrame from the full_list
food_df = pd.DataFrame({'Food Name': full_list})

# Create a new column in mFood to indicate matches
mFood['Matches'] = ""
mFood['Similarity Score'] = ""

# Loop through the food names and perform fuzzy matching
for food_name in food_df['Food Name']:
    best_similarity = 0
    best_match = None

    for index, row in mFood.iterrows():
        similarity = fuzz.partial_ratio(food_name, row['Food Name'])
        if similarity > best_similarity:
            best_similarity = similarity
            best_match = row['Food Name']

    if best_match is not None:
        mFood.loc[mFood['Food Name'] == best_match, 'Matches'] += f"{food_name}, "
        mFood.loc[mFood['Food Name'] == best_match, 'Similarity Score'] = best_similarity

CPU times: total: 2min 14s
Wall time: 2min 14s


In [16]:
pd.set_option('display.max_rows', None)
mFood[mFood['Matches'] != '']#.count()

Unnamed: 0,Food Code,Food Name,Description,Food sub-group codes,Previous,Main data references,Footnote,Energy (kJ),Energy (kcal),Fat (g),Saturates (g),Carbohydrate (g),Sugars (g),Starch (g),Fibre (g),Protein (g),Salt (g),FoodGroup,Matches,Similarity Score
0,13-145,"Ackee, canned, drained",8 cans,DG,554,"MW4, 1978; and Vegetables, Herbs and Spices Su...",,639,155,15.2,4.44,0.8,0.8,0,1.8,2.9,0.6,Vegetables,"Ackee, canned,",100
1,13-146,"Agar, dried",Literature sources,DG,,Wu Leung et al. (1972) Food composition table ...,,721,180,1.2,0.3,0,0,0,81.1,1.6,0.28,Vegetables,"Agar, dried,",100
2,13-147,"Agar, dried, soaked and drained",Literature sources,DG,,Wu Leung et al. (1972) Food composition table ...,,127,32,0.1,0,0,0,0,15,0.2,0.03,Vegetables,"Fish, smoked haddock,",60
3,13-148,"Alfalfa sprouts, raw",Analytical and literature sources,DG,,"Vegetables, Herbs and Spices Supplement, 1991",,113,27,0.7,0.07,0.3,0.3,0,1.7,4.0,0.02,Vegetables,"Nuts, cashew,",67
8,14-896,"Almonds, whole kernels","Literature sources, Prunus dulcis",GA,,"USDA SR28, 2015",,2466,596,49.9,3.8,5,4.3,0.7,12.5,25.6,0,Nuts and seeds,"Almonds, whole,",100
10,13-149,"Amaranth leaves, raw",Literature sources,DG,,Wu Leung et al. (1972) Food composition table ...,,111,27,0.3,0.1,0.3,0.2,0.1,4.4,3.5,0.03,Vegetables,"Amaranth leaves, raw,",100
12,16-448,"Anchovies, canned in oil, drained","10 samples, 4 brands",JC,16-323,"Data from Fish and Fish Products Supplement, 1...",,798,191,10.0,1.6,0,0,0,0,25.2,14.68,Fish and fish products,"Anchovies, canned, Olives, canned, Fish, tuna,...",68
21,14-362,"Apples, cooking, raw, flesh only, peeled",Bramley apples and unspecified. Analytical dat...,F,14-002,"LGC, Nutritional composition of fresh fruit, 1...",,187,44,0.3,0.06,8.9,8.9,0,1.7,0.3,0.01,Fruit,"Apples, cooking, raw,",100
26,14-319,"Apples, eating, raw, flesh and skin","22 samples, autumn and winter, UK grown and im...",FA,14-012,"DH, Nutrient analysis of fruit and vegetables,...",,248,59,0.5,0.12,11.5,11.5,0,1.2,0.6,0,Fruit,"Apples, eating, raw,",100
28,14-031,"Apricots, dried",No stones,FA,,"LGC, Nutritional composition of fruit products...",,1027,242,0.7,0.18,42.8,42.8,0,7.7,4.8,0.14,Fruit,"Apricots, dried,",100
